Init basic cluster
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
secrets.yaml
|
||||
talos/generated
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "openwrt/roles/ansible-openwrt"]
|
||||
path = openwrt/roles/ansible-openwrt
|
||||
url = https://github.com/gekmihesg/ansible-openwrt.git
|
||||
3
.vscode/extensions.json
vendored
Normal file
3
.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"recommendations": ["arrterian.nix-env-selector", "jnoortheen.nix-ide"]
|
||||
}
|
||||
12
.vscode/settings.json
vendored
Normal file
12
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"nixEnvSelector.nixFile": "${workspaceFolder}/shell.nix",
|
||||
"terminal.integrated.profiles.linux": {
|
||||
"Nix Shell": {
|
||||
"path": "nix",
|
||||
"args": ["develop"],
|
||||
"icon": "terminal-linux"
|
||||
}
|
||||
},
|
||||
"terminal.integrated.defaultProfile.linux": "Nix Shell",
|
||||
"ansible.python.interpreterPath": "/bin/python"
|
||||
}
|
||||
12
Makefile
Normal file
12
Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
install-router:
|
||||
ansible-playbook ansible/playbook.yml -i ansible/hosts
|
||||
|
||||
gen-talos-config:
|
||||
mkdir -p talos/generated
|
||||
talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --config-patch @talos/patches/zoma-dibaiyin.patch --output-types controlplane -o talos/generated/zoma-dibaiyin.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
|
||||
talosctl gen config --with-secrets secrets.yaml --config-patch @talos/patches/controlplane.patch --output-types worker -o talos/generated/worker.yaml homelab https://kube-api.homelab.lumpiasty.xyz:6443
|
||||
talosctl gen config --with-secrets secrets.yaml --output-types talosconfig -o talos/generated/talosconfig homelab https://kube-api.homelab.lumpiasty.xyz:6443
|
||||
talosctl config endpoint kube-api.homelab.lumpiasty.xyz
|
||||
|
||||
apply-talos-config:
|
||||
talosctl -n zoma-dibaiyin apply-config -f talos/generated/zoma-dibaiyin.yaml
|
||||
106
README.md
Normal file
106
README.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# Homelab
|
||||
|
||||
## Goals
|
||||
|
||||
Wanting to set up homelab kubernetes cluster.
|
||||
|
||||
### Software
|
||||
|
||||
1. Running applications
|
||||
1. NAS, backups, security recorder
|
||||
2. Online presence, website, email, communicators (ts3, matrix?)
|
||||
3. Git server, container registry
|
||||
4. Environment to deploy my own apps
|
||||
5. Some LLM server, apps for my own use
|
||||
6. Public services like Tor, mirrors of linux distros etc.
|
||||
7. [Some frontends](https://libredirect.github.io/)
|
||||
8. [Awesome-Selfhosted](https://github.com/awesome-selfhosted/awesome-selfhosted), [Awesome Sysadmin](https://github.com/awesome-foss/awesome-sysadmin)
|
||||
2. Managing them hopefully using GitOps
|
||||
1. FluxCD, Argo etc.
|
||||
2. State of cluster in git, all apps version pinned
|
||||
3. Some bot to inform about updates?
|
||||
3. It's a home**lab**
|
||||
1. Should be open to experimenting
|
||||
2. Avoiding vendor lock-in, changing my mind shouldn't block me for too long
|
||||
3. Backups of important data in easy to access format
|
||||
4. Expecting downtime, no critical workloads
|
||||
5. Trying to keep it reasonably up anyways
|
||||
|
||||
### Infrastructure
|
||||
|
||||
1. Using commodity hardware
|
||||
2. Reasonably scalable
|
||||
3. Preferably mobile workloads, software should be a bit more flexible than me moving disks and data
|
||||
4. Replication is overkill for most data
|
||||
5. Preferably dynamically configured network
|
||||
1. BGP with OpenWRT router
|
||||
2. Dynamically allocated host subnets
|
||||
3. Load-balancing (MetalLB?), ECMP on router
|
||||
4. Static IP configurations on nodes
|
||||
6. IPv6 native, IPv4 accessible
|
||||
1. IPv6 has whole block routed to us which gives us control over address routing and usage
|
||||
2. Which allows us to expose services directly to the internet without complex router config
|
||||
3. Which allows us to use eg. ExternalDNS to autoconfigure domain names for LB
|
||||
4. But majority of the world still runs IPv4, which should be supported for public services
|
||||
5. Exposing IPv4 service may require additional reconfiguration of router, port forwarding, manual domain setting or controller doing this some day in future
|
||||
6. One public IPv4 address means probably extensive use of rule-based ingress controllers
|
||||
7. IPv6 internet from pods should not be NATed
|
||||
8. IPv4 internet from pods should be NATed by router
|
||||
|
||||
### Current implementation idea
|
||||
|
||||
1. Cluster server nodes running Talos
|
||||
2. OpenWRT router
|
||||
1. VLAN / virtual interface, for cluster
|
||||
2. Configuring using Ansible
|
||||
3. Peering with cluster using BGP
|
||||
4. Load-balancing using ECMP
|
||||
3. Cluster networking
|
||||
1. Cilium CNI
|
||||
2. Native routing, no encapsulation or overlay
|
||||
3. Using Cilium's network policies for firewall needs
|
||||
4. IPv6 address pool
|
||||
1. Nodes: 2001:470:61a3:100::/64
|
||||
2. Pods: 2001:470:61a3:200::/64
|
||||
3. Services: 2001:470:61a3:300::/112
|
||||
4. Load balancer: 2001:470:61a3:400::/112
|
||||
5. IPv4 address pool
|
||||
1. Nodes: 192.168.1.32/27
|
||||
2. Pods: 10.42.0.0/16
|
||||
3. Services: 10.43.0.0/16
|
||||
4. Load balancer: 10.44.0.0/16
|
||||
4. Storage
|
||||
1. OS is installed on dedicated disk
|
||||
2. Mayastor managing all data disks
|
||||
1. DiskPool for each data disk in cluster, labelled by type SSD or HDD
|
||||
2. Creating StorageClass for each topology need (type, whether to replicate, on which node etc.)
|
||||
|
||||
## Working with repo
|
||||
|
||||
Repo is preconfigured to use with nix and vscode
|
||||
|
||||
Install nix, vscode should pick up settings and launch terminals in `nix develop` with all needed utils.
|
||||
|
||||
## Bootstrapping cluster
|
||||
|
||||
1. Configure OpenWRT, create dedicated interface for connecting server
|
||||
1. Set up node subnet, routing
|
||||
2. Create static host entry `kube-api.homelab.lumpiasty.xyz` pointing at ipv6 of first node
|
||||
2. Connect server
|
||||
3. Grab Talos ISO, dd it to usb stick
|
||||
4. Boot it and using keyboard set up static ip ipv6 subnet, should become reachable from pc
|
||||
5. `talosctl gen config homelab https://kube-api.homelab.lumpiasty.xyz:6443`
|
||||
6. Generate secrets `talosctl gen secrets`, **backup, keep `secrets.yml` safe**
|
||||
7. Generate config files `make gen-talos-config`
|
||||
8. Apply config to first node `talosctl apply-config --insecure -n 2001:470:61a3:100::2 -f controlplane.yml`
|
||||
9. Wait for reboot then `talosctl bootstrap --talosconfig=talosconfig -n 2001:470:61a3:100::2`
|
||||
10. Set up router and CNI
|
||||
|
||||
## Updating Talos config
|
||||
|
||||
Update patches and re-generate and apply configs.
|
||||
|
||||
```
|
||||
make gen-talos-config
|
||||
make apply-talos-config
|
||||
```
|
||||
2
ansible/hosts
Normal file
2
ansible/hosts
Normal file
@@ -0,0 +1,2 @@
|
||||
[openwrt]
|
||||
2001:470:61a3:100:ffff:ffff:ffff:ffff ansible_scp_extra_args="-O"
|
||||
6
ansible/playbook.yml
Normal file
6
ansible/playbook.yml
Normal file
@@ -0,0 +1,6 @@
|
||||
- name: Configure router
|
||||
hosts: openwrt
|
||||
remote_user: root
|
||||
roles:
|
||||
- ansible-openwrt
|
||||
- router
|
||||
54
ansible/roles/router/files/bird.conf
Normal file
54
ansible/roles/router/files/bird.conf
Normal file
@@ -0,0 +1,54 @@
|
||||
# Would never work without this awesome blogpost
|
||||
# https://farcaller.net/2024/making-cilium-bgp-work-with-ipv6/
|
||||
|
||||
log "/tmp/bird.log" all;
|
||||
log syslog all;
|
||||
|
||||
#Router ID
|
||||
router id 192.168.1.1;
|
||||
|
||||
protocol kernel kernel4 {
|
||||
learn;
|
||||
scan time 10;
|
||||
merge paths yes;
|
||||
ipv4 {
|
||||
# Importing only default route, we're their default gateway so they dont need rest
|
||||
import filter { if net = 0.0.0.0/0 then { igp_metric = 100; accept; } reject; };
|
||||
export all;
|
||||
};
|
||||
}
|
||||
|
||||
protocol kernel kernel6 {
|
||||
learn;
|
||||
scan time 10;
|
||||
merge paths yes;
|
||||
ipv6 {
|
||||
import filter { if net = ::/0 then { igp_metric = 100; accept; } reject; };
|
||||
export all;
|
||||
};
|
||||
}
|
||||
|
||||
protocol device {
|
||||
scan time 10;
|
||||
}
|
||||
|
||||
protocol direct {
|
||||
interface "*";
|
||||
}
|
||||
|
||||
protocol bgp homelab {
|
||||
debug { events };
|
||||
passive;
|
||||
direct;
|
||||
local 2001:470:61a3:100:ffff:ffff:ffff:ffff as 65000;
|
||||
neighbor range 2001:470:61a3:100::/64 as 65000;
|
||||
ipv4 {
|
||||
extended next hop yes;
|
||||
import all;
|
||||
export all;
|
||||
};
|
||||
ipv6 {
|
||||
import all;
|
||||
export all;
|
||||
};
|
||||
}
|
||||
5
ansible/roles/router/handlers/main.yml
Normal file
5
ansible/roles/router/handlers/main.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
- name: Reload bird
|
||||
service:
|
||||
name: bird
|
||||
state: restarted
|
||||
enabled: true
|
||||
16
ansible/roles/router/tasks/main.yml
Normal file
16
ansible/roles/router/tasks/main.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
---
|
||||
- name: Install bird2
|
||||
opkg:
|
||||
name: "{{ item }}"
|
||||
state: present
|
||||
# Workaround for opkg module not handling multiple names at once well
|
||||
loop:
|
||||
- bird2
|
||||
- bird2c
|
||||
|
||||
- name: Set up bird.conf
|
||||
ansible.builtin.copy:
|
||||
src: bird.conf
|
||||
dest: /etc/bird.conf
|
||||
mode: "644"
|
||||
notify: Reload bird
|
||||
42
flake.lock
generated
Normal file
42
flake.lock
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"nodes": {
|
||||
"flake-compat": {
|
||||
"locked": {
|
||||
"lastModified": 1733328505,
|
||||
"narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
|
||||
"rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
|
||||
"revCount": 69,
|
||||
"type": "tarball",
|
||||
"url": "https://api.flakehub.com/f/pinned/edolstra/flake-compat/1.1.0/01948eb7-9cba-704f-bbf3-3fa956735b52/source.tar.gz"
|
||||
},
|
||||
"original": {
|
||||
"type": "tarball",
|
||||
"url": "https://flakehub.com/f/edolstra/flake-compat/1.1.0.tar.gz"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1738142207,
|
||||
"narHash": "sha256-NGqpVVxNAHwIicXpgaVqJEJWeyqzoQJ9oc8lnK9+WC4=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "9d3ae807ebd2981d593cddd0080856873139aa40",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"flake-compat": "flake-compat",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
35
flake.nix
Normal file
35
flake.nix
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
|
||||
# Only to ease updating flake.lock, flake-compat is used by shell.nix
|
||||
flake-compat.url = https://flakehub.com/f/edolstra/flake-compat/1.1.0.tar.gz;
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, ... }: let
|
||||
system = "x86_64-linux";
|
||||
in {
|
||||
devShells."${system}".default =
|
||||
let
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
};
|
||||
in
|
||||
pkgs.mkShell {
|
||||
packages = with pkgs; [
|
||||
vim gnumake
|
||||
talosctl cilium-cli
|
||||
kubectl kubectx k9s kubernetes-helm
|
||||
ansible
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
# Get completions working
|
||||
source ${pkgs.bash-completion}/share/bash-completion/bash_completion
|
||||
|
||||
export TALOSCONFIG=$(pwd)/talos/generated/talosconfig
|
||||
export EDITOR=vim
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
5
infra/cilium/Makefile
Normal file
5
infra/cilium/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
install:
|
||||
helm repo add cilium https://helm.cilium.io/
|
||||
helm repo update cilium
|
||||
helm upgrade --install -n kube-system cilium cilium/cilium --version 1.16.6 -f values.yml
|
||||
kubectl apply -f loadbalancer-ippool.yml -f bgp-cluster-config.yml
|
||||
62
infra/cilium/bgp-cluster-config.yml
Normal file
62
infra/cilium/bgp-cluster-config.yml
Normal file
@@ -0,0 +1,62 @@
|
||||
apiVersion: cilium.io/v2alpha1
|
||||
kind: CiliumBGPClusterConfig
|
||||
metadata:
|
||||
name: router
|
||||
spec:
|
||||
bgpInstances:
|
||||
- name: "main"
|
||||
localASN: 65000
|
||||
peers:
|
||||
- name: barracuda
|
||||
peerASN: 65000
|
||||
peerAddress: 2001:470:61a3:100:ffff:ffff:ffff:ffff
|
||||
peerConfigRef:
|
||||
name: cilium-peer
|
||||
---
|
||||
apiVersion: cilium.io/v2alpha1
|
||||
kind: CiliumBGPPeerConfig
|
||||
metadata:
|
||||
name: cilium-peer
|
||||
spec:
|
||||
timers:
|
||||
keepAliveTimeSeconds: 3
|
||||
holdTimeSeconds: 9
|
||||
connectRetryTimeSeconds: 12
|
||||
gracefulRestart:
|
||||
enabled: true
|
||||
restartTimeSeconds: 60
|
||||
families:
|
||||
- afi: ipv4
|
||||
safi: unicast
|
||||
advertisements:
|
||||
matchLabels:
|
||||
advertise: bgp
|
||||
- afi: ipv6
|
||||
safi: unicast
|
||||
advertisements:
|
||||
matchLabels:
|
||||
advertise: bgp
|
||||
---
|
||||
apiVersion: cilium.io/v2alpha1
|
||||
kind: CiliumBGPAdvertisement
|
||||
metadata:
|
||||
name: bgp-advertisements
|
||||
labels:
|
||||
advertise: bgp
|
||||
spec:
|
||||
advertisements:
|
||||
- advertisementType: "CiliumPodIPPool"
|
||||
# Wanting to advertise all pools
|
||||
# https://docs.cilium.io/en/v1.16/network/bgp-control-plane/bgp-control-plane-v2/#multipool-ipam
|
||||
selector:
|
||||
matchExpressions:
|
||||
- { key: somekey, operator: NotIn, values: ["never-used-value"] }
|
||||
- advertisementType: Service
|
||||
service:
|
||||
addresses:
|
||||
# Not advertising ClusterIP - kubeproxyreplacement should? translate them directly to pod ips
|
||||
# Not advertising ExternalIP - they should be reachable via static config
|
||||
- LoadBalancerIP
|
||||
selector:
|
||||
matchExpressions:
|
||||
- { key: somekey, operator: NotIn, values: ["never-used-value"] }
|
||||
8
infra/cilium/loadbalancer-ippool.yml
Normal file
8
infra/cilium/loadbalancer-ippool.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: "cilium.io/v2alpha1"
|
||||
kind: CiliumLoadBalancerIPPool
|
||||
metadata:
|
||||
name: "blue-pool"
|
||||
spec:
|
||||
blocks:
|
||||
- cidr: 10.44.0.0/16
|
||||
- cidr: 2001:470:61a3:400::/112
|
||||
59
infra/cilium/values.yml
Normal file
59
infra/cilium/values.yml
Normal file
@@ -0,0 +1,59 @@
|
||||
# Talos specific
|
||||
# https://www.talos.dev/v1.9/kubernetes-guides/network/deploying-cilium/
|
||||
kubeProxyReplacement: true
|
||||
|
||||
securityContext:
|
||||
capabilities:
|
||||
ciliumAgent:
|
||||
- CHOWN
|
||||
- KILL
|
||||
- NET_ADMIN
|
||||
- NET_RAW
|
||||
- IPC_LOCK
|
||||
- SYS_ADMIN
|
||||
- SYS_RESOURCE
|
||||
- DAC_OVERRIDE
|
||||
- FOWNER
|
||||
- SETGID
|
||||
- SETUID
|
||||
cleanCiliumState:
|
||||
- NET_ADMIN
|
||||
- SYS_ADMIN
|
||||
- SYS_RESOURCE
|
||||
|
||||
cgroup:
|
||||
autoMount:
|
||||
enabled: false
|
||||
hostRoot: /sys/fs/cgroup
|
||||
|
||||
k8sServiceHost: localhost
|
||||
k8sServicePort: 7445
|
||||
|
||||
# Homelab
|
||||
routingMode: native
|
||||
ipv6:
|
||||
enabled: true
|
||||
|
||||
# Enable managing of IP Pools via CRD
|
||||
ipam:
|
||||
mode: multi-pool
|
||||
operator:
|
||||
autoCreateCiliumPodIPPools:
|
||||
default:
|
||||
# In line with controlplane.yml
|
||||
ipv4:
|
||||
cidrs:
|
||||
- 10.42.0.0/16
|
||||
maskSize: 24
|
||||
ipv6:
|
||||
cidrs:
|
||||
- 2001:470:61a3:200::/104
|
||||
maskSize: 120
|
||||
|
||||
# Disable masquerading, ipv4 is done at router level
|
||||
enableIPv4Masquerade: false
|
||||
enableIPv6Masquerade: false
|
||||
|
||||
# Enable peering announcing routes via BGP
|
||||
bgpControlPlane:
|
||||
enabled: true
|
||||
15
shell.nix
Normal file
15
shell.nix
Normal file
@@ -0,0 +1,15 @@
|
||||
# Needed for Nix Environment Selector
|
||||
# https://github.com/edolstra/flake-compat/
|
||||
(import
|
||||
(
|
||||
let
|
||||
lock = builtins.fromJSON (builtins.readFile ./flake.lock);
|
||||
nodeName = lock.nodes.root.inputs.flake-compat;
|
||||
in
|
||||
fetchTarball {
|
||||
url = lock.nodes.${nodeName}.locked.url;
|
||||
sha256 = lock.nodes.${nodeName}.locked.narHash;
|
||||
}
|
||||
)
|
||||
{ src = ./.; }
|
||||
).shellNix
|
||||
34
talos/patches/controlplane.patch
Normal file
34
talos/patches/controlplane.patch
Normal file
@@ -0,0 +1,34 @@
|
||||
machine:
|
||||
certSANs:
|
||||
- kube-api.homelab.lumpiasty.xyz
|
||||
|
||||
network:
|
||||
nameservers:
|
||||
- 2001:470:61a3:100:ffff:ffff:ffff:ffff
|
||||
- 192.168.1.1
|
||||
|
||||
searchDomains:
|
||||
- homelab-infra.lumpiasty.xyz
|
||||
|
||||
features:
|
||||
hostDNS:
|
||||
forwardKubeDNSToHost: false
|
||||
|
||||
cluster:
|
||||
network:
|
||||
# Likely redundant, we use Cilium as IPAM with their CRDs
|
||||
podSubnets:
|
||||
- 10.42.0.0/16
|
||||
- 2001:470:61a3:200::/64
|
||||
# Less likely redundant
|
||||
serviceSubnets:
|
||||
- 10.43.0.0/16
|
||||
- 2001:470:61a3:300::/112
|
||||
# We manually install Cilium
|
||||
cni:
|
||||
name: none
|
||||
# We use Cilium's proxy replacement
|
||||
proxy:
|
||||
disabled: true
|
||||
# Too poor
|
||||
allowSchedulingOnControlPlanes: true
|
||||
17
talos/patches/zoma-dibaiyin.patch
Normal file
17
talos/patches/zoma-dibaiyin.patch
Normal file
@@ -0,0 +1,17 @@
|
||||
machine:
|
||||
network:
|
||||
interfaces:
|
||||
- interface: enp0s31f6
|
||||
addresses:
|
||||
- 2001:470:61a3:100::2/64
|
||||
- 192.168.1.34/24
|
||||
routes:
|
||||
- network: ::/0
|
||||
gateway: 2001:470:61a3:100:ffff:ffff:ffff:ffff
|
||||
- network: 0.0.0.0/0
|
||||
gateway: 192.168.1.1
|
||||
mtu: 1500
|
||||
|
||||
install:
|
||||
diskSelector:
|
||||
wwid: t10.ATA Patriot Blast C26D07641DCF00006955
|
||||
Reference in New Issue
Block a user