lte failover
ci/woodpecker/push/flux-reconcile-source Pipeline was successful
ci/woodpecker/cron/renovate Pipeline was successful

This commit is contained in:
2026-05-27 23:40:33 +02:00
parent 754c8952bc
commit 5b026593ce
15 changed files with 761 additions and 111 deletions
+2 -4
View File
@@ -10,10 +10,6 @@ openwrt_mgmt_ip: 192.168.255.11
openwrt_mgmt_prefix: 24
openwrt_mgmt_gateway: 192.168.255.10
# DNS servers for the AP itself
openwrt_dns_servers:
- 192.168.0.1
# SSH authorised keys (list of public key strings)
openwrt_ssh_authorized_keys: []
@@ -26,4 +22,6 @@ openwrt_ntp_servers:
openwrt_packages:
- usb-modeswitch # switches embedded LTE modem (Qualcomm 05c6:9008) from EDL to QMI mode on boot
- luci-proto-qmi # adds QMI protocol support to LuCI for configuring the embedded LTE modem
- bird2 # BGP daemon — peers with CRS for LTE failover route signalling
- bird2c # Control CLI interface for BGP daemon
+5
View File
@@ -12,3 +12,8 @@
- name: Reload wireless
community.openwrt.command:
cmd: wifi reload
- name: Reload bird
community.openwrt.service:
name: bird
state: restarted
+153
View File
@@ -0,0 +1,153 @@
---
# Configures BIRD2 on the D-Link as an iBGP peer of the MikroTik CRS418.
#
# Route exchange:
# D-Link → CRS: announces 0.0.0.0/0 and 2000::/3 when wwan0 is up.
# CRS installs these at BGP distance 200 (below the GPON
# static default at distance 1 — activates only on GPON failure).
#
# CRS → D-Link: announces connected routes (VLAN subnets), static routes
# (Tailscale, GPON default), and reflects k8s BGP routes.
# BIRD2 installs all of these into the kernel at metric 10.
#
# D-Link's own routing:
# - Kernel metric 10: BGP-learned routes from CRS (preferred)
# - Kernel metric 100: wwan QMI-assigned routes (fallback)
# No static default gateway on uplink — the default comes from BGP.
# When GPON fails, CRS withdraws the BGP default; D-Link falls back to wwan.
- name: Write BIRD2 configuration
community.openwrt.copy:
dest: /etc/bird.conf
mode: '0640'
owner: root
group: root
content: |
# BIRD2 — LTE failover BGP peer for MikroTik CRS418
# iBGP session, AS 65000, peer: 192.168.6.1 (CRS vlan6)
router id 192.168.6.2;
protocol device {
# Tracks interface up/down state via netlink.
# scan time is a periodic reconciliation fallback; real events are
# netlink-driven and processed immediately.
scan time 5;
}
# Announce directly connected prefixes into BIRD2's RIB so that
# next-hop resolution works for BGP routes received from CRS.
# Without this, 192.168.6.1 (CRS uplink) is unresolvable and all
# IPv4 BGP routes appear unreachable. Same for IPv6 uplink prefix.
protocol direct {
ipv4;
ipv6;
interface "eth0.6";
}
# Install BGP-learned routes from CRS into the kernel at metric 10.
# This is lower than the wwan QMI default (metric 100), so D-Link
# prefers the CRS path for its own outbound traffic when GPON is up.
# import none: BIRD2 does not read the kernel table, preventing
# wwan kernel routes from leaking into BGP.
protocol kernel k4 {
ipv4 {
import none;
export filter {
if proto = "crs" then {
krt_metric = 10;
accept;
}
reject;
};
};
}
protocol kernel k6 {
ipv6 {
import none;
export filter {
if proto = "crs" then {
krt_metric = 10;
accept;
}
reject;
};
};
}
# LTE default routes — exist only while wwan0 is up.
# BIRD2's device protocol tracks wwan0 via netlink; when the interface
# goes down the routes become unreachable and BGP withdraws them.
# Uses interface-name routing (no explicit gateway IP) which is correct
# for QMI raw-ip POINTOPOINT NOARP interfaces.
#
# Preference 50 is below BGP's default of 100 — these routes are only
# used by BIRD2 internally as a presence signal for BGP export, NOT for
# installing into the kernel as our active default route. The kernel
# already gets the wwan default at metric 100 via netifd/qmi.sh, and
# we want the BGP-learned default via CRS (kernel metric 10) to be
# preferred for D-Link's own outbound traffic when GPON is up.
protocol static lte_default {
ipv4 {
preference 50;
};
route 0.0.0.0/0 via "wwan0";
}
protocol static lte_default6 {
ipv6 {
preference 50;
};
route 2000::/3 via "wwan0";
}
protocol bgp crs {
description "MikroTik CRS418 — LTE failover signalling";
local 192.168.6.2 as 65000;
neighbor 192.168.6.1 as 65000;
hold time 30;
keepalive time 10;
ipv4 {
# Import all prefixes CRS announces (VLAN subnets, static routes,
# k8s BGP routes reflected via RR). Installed into kernel via k4.
import all;
# Export only the wwan-sourced LTE default route.
# BGP-learned CRS routes are never re-exported (iBGP split-horizon
# applies; BIRD2 also does not import CRS routes into its RIB from
# the kernel, so they cannot appear here).
export where proto = "lte_default";
};
ipv6 {
# CRS uses Extended Next Hop (RFC 5549) for IPv6 routes, advertising
# them with the IPv4 next-hop 192.168.6.1. The Linux kernel cannot
# install IPv6 routes with IPv4 next-hops. Accept the routes from BGP
# (we negotiated ENHE via "extended next hop yes") but rewrite the
# next-hop in the import filter to the CRS's native IPv6 address on
# vlan6 before they reach the kernel.
extended next hop yes;
import filter {
gw = 2001:470:61a3:600::1;
accept;
};
# Force our own native IPv6 address as the next-hop when advertising
# to CRS, otherwise BIRD2 also uses ENHE and CRS receives a route
# with ::ffff:192.168.6.2 which it can't resolve as an IPv6 next-hop.
export filter {
if proto = "lte_default6" then {
bgp_next_hop = 2001:470:61a3:600::2;
accept;
}
reject;
};
};
}
notify: Reload bird
- name: Enable and start BIRD2 service
community.openwrt.service:
name: bird
enabled: true
state: started
+12 -2
View File
@@ -20,10 +20,11 @@
# output: ACCEPT (AP itself initiates outbound — opkg, NTP, etc.)
# forward: REJECT (AP does not route client traffic through uplink)
#
# wwan — LTE modem uplink (Orange PL, /dev/cdc-wdm0, disabled by default)
# wwan — LTE modem uplink (Orange PL, /dev/cdc-wdm0, always-on)
# input: REJECT (no inbound from LTE)
# output: ACCEPT (AP itself uses LTE for outbound when uplink unavailable)
# forward: REJECT (no client traffic through LTE)
# forward: REJECT (default; overridden by explicit uplink→wwan forwarding rule)
# masq/masq6: enabled — NAT all traffic exiting via wwan (own + forwarded)
#
# No forwarding rules between zones — all inter-zone policy is on MikroTik.
@@ -75,6 +76,15 @@
option input 'REJECT'
option output 'ACCEPT'
option forward 'REJECT'
option masq '1'
option masq6 '1'
# Forward traffic from MikroTik (arriving on uplink/vlan6) out through wwan
# during LTE failover. MikroTik routes LAN/SRV/IoT traffic here when GPON
# is down and the BGP-learned default via 192.168.6.2 is active.
config forwarding
option src 'uplink'
option dest 'wwan'
config rule
option name 'Allow-ICMPv6-uplink'
+3
View File
@@ -18,6 +18,9 @@
- name: WWAN modem configuration
ansible.builtin.import_tasks: wwan.yml
- name: BIRD2 BGP configuration
ansible.builtin.import_tasks: bird.yml
- name: Firewall configuration
ansible.builtin.import_tasks: firewall.yml
+7 -3
View File
@@ -13,7 +13,7 @@
# mgmt — static 192.168.255.11/24 on eth0.1, management
# lan — bridge (br-lan) on eth0.2, LAN clients via LAN ports
# iot — bridge (br-iot) on eth0.5, IoT clients via wifi only
# uplink — static 192.168.6.2/24 + 2001:470:61a3:600::2/64 on eth0.6, internet access for opkg
# uplink — static 192.168.6.2/24 + 2001:470:61a3:600::2/64 on eth0.6, BGP peer link to CRS (no static gateway — default learned via BIRD2)
# wwan — QMI LTE modem (/dev/cdc-wdm0), Orange PL dual-stack failover (APNs: internet + internetipv6)
# Manual ifup only (option auto '0'); modem-specific quirks handled in wwan.yml.
@@ -156,17 +156,21 @@
option pdptype 'ipv4v6'
option dhcp '0'
option dhcpv6 '0'
option peerdns '0'
option metric '100'
# auto '0': netifd does not bring up wwan at boot. The modem takes
# 30-90s after boot before its QMI service responds, and netifd's
# retry/backoff handles this poorly (failed attempts leave the
# interface in 'pending' state). A separate procd service waits
# for the modem to be ready and triggers ifup wwan once.
option auto '0'
config interface 'uplink'
option device 'eth0.6'
option proto 'static'
option ipaddr '192.168.6.2/24'
option gateway '192.168.6.1'
option dns '192.168.6.1'
option ip6addr '2001:470:61a3:600::2/64'
option ip6gw '2001:470:61a3:600::1'
notify: Reload network
+10
View File
@@ -28,3 +28,13 @@
key: "dropbear.@dropbear[0].authorized_keys"
value: "{{ openwrt_ssh_authorized_keys | join('\n') }}"
when: openwrt_ssh_authorized_keys | length > 0
# The D-Link is a pure AP/relay — no local clients need DNS from it.
# Disable dnsmasq entirely and point the system resolver directly at the
# CRS (192.168.6.1), which is always reachable via vlan6 regardless of
# WAN state and resolves using public upstream servers (1.1.1.1 etc.).
- name: Disable dnsmasq service
community.openwrt.service:
name: dnsmasq
enabled: false
state: stopped
+135
View File
@@ -103,3 +103,138 @@
community.openwrt.command:
cmd: uqmi -t 3000 -d /dev/cdc-wdm0 --modify-profile 3gpp,2 --apn internetipv6 --pdp-type ipv6
changed_when: false
# On cold boot the BM806C's UIM (SIM) QMI service comes up permanently
# broken: --uim-get-sim-state returns {}, --get-imsi returns
# "UIM uninitialized", AT+CPIN? returns +CME ERROR: SIM busy, and the
# modem never converges (verified at uptime 21 min with no intervention).
# CTL/NAS/WDS do come up after ~5 min of warmup, but UIM does not.
#
# A single USB re-enumeration of the device (authorized=0 / authorized=1)
# forces the modem to redo its internal QMI service init from scratch.
# After this, UIM comes up within ~1 s and ifup wwan succeeds normally.
#
# We use authorized=0/1 rather than usb/unbind+bind because the former
# keeps qmi_wwan in the bound-drivers list and the kernel re-runs its
# bind machinery for us; the latter detaches and re-attaches drivers
# explicitly. Both work; authorized is cleaner.
#
# Full investigation, ruled-out hypotheses, and reproduction steps:
# /root/wwan-diag/boot-wedge-investigation.md on the router.
- name: Install wwan-bringup worker script
community.openwrt.copy:
dest: /usr/libexec/wwan-bringup
mode: '0755'
owner: root
group: root
content: |
#!/bin/sh
# Force-clean BM806C cold-boot UIM wedge by re-enumerating the USB
# device once, then bring up wwan. Called by /etc/init.d/wwan-bringup
# as a procd service.
DEV=/dev/cdc-wdm0
IFACE=wwan
USB_PORT=1-1
log() {
logger -t wwan-bringup "$1"
}
# Wait for cold-boot enumeration of cdc-wdm0 (<=60s).
waited=0
while [ ! -e "$DEV" ]; do
sleep 1
waited=$((waited + 1))
[ $waited -ge 60 ] && break
done
if [ ! -e "$DEV" ]; then
log "$DEV never appeared within 60s; giving up"
exit 1
fi
# Force-clean re-enumeration. The BM806C's UIM QMI service never
# comes up on cold boot without this.
log "BM806C cold-boot UIM workaround: re-authorizing $USB_PORT"
echo 0 > /sys/bus/usb/devices/$USB_PORT/authorized
sleep 3
echo 1 > /sys/bus/usb/devices/$USB_PORT/authorized
# Wait for cdc-wdm0 to return after re-enumeration (<=30s).
waited=0
while [ ! -e "$DEV" ]; do
sleep 1
waited=$((waited + 1))
[ $waited -ge 30 ] && break
done
if [ ! -e "$DEV" ]; then
log "$DEV did not return after re-auth; giving up"
exit 1
fi
# qmi.sh's own SIM-init and network-registration loops handle the
# small remaining warmup (~5-30s) gracefully now that UIM is healthy.
log "bringing up $IFACE"
ifup "$IFACE"
# qmi.sh installs an IPv6 default route with a source-specific prefix
# constraint (`default from 2a00:f44:.../64 ...`). This means only
# traffic sourced from the wwan IPv6 prefix uses it — forwarded traffic
# from internal subnets fails routing lookup with "net unreachable"
# before masquerade can rewrite the source. Add a non-source-specific
# default at a higher metric so forwarded traffic has a valid route,
# gets routed out wwan0, then masqueraded by fw4.
#
# Wait up to 90s for qmi.sh to install its source-specific default,
# then derive the gateway and add a regular default route.
waited=0
while [ $waited -lt 90 ]; do
gw6=$(ip -6 route show default dev wwan0 2>/dev/null | awk '/^default from/ {print $5; exit}')
if [ -n "$gw6" ]; then
if ip -6 route show default dev wwan0 | grep -qE "^default via "; then
log "non-source-specific IPv6 default already present"
else
log "adding non-source-specific IPv6 default via $gw6"
ip -6 route add default via "$gw6" dev wwan0 metric 1024
fi
break
fi
sleep 3
waited=$((waited + 3))
done
[ -z "$gw6" ] && log "warning: wwan IPv6 gateway never appeared, skipping default route"
- name: Install wwan-bringup init script
community.openwrt.copy:
dest: /etc/init.d/wwan-bringup
mode: '0755'
owner: root
group: root
content: |
#!/bin/sh /etc/rc.common
# Starts the wwan-bringup worker which re-enumerates the BM806C USB
# device once to clear the cold-boot UIM wedge, then triggers
# `ifup wwan`. See /usr/libexec/wwan-bringup.
START=99
USE_PROCD=1
# One-shot script: launch the worker directly without procd_open_instance
# so procd does not respawn it after successful exit.
PIDFILE=/var/run/wwan-bringup.pid
start_service() {
/usr/libexec/wwan-bringup &
echo $! > $PIDFILE
}
stop_service() {
[ -f $PIDFILE ] && kill "$(cat $PIDFILE)" 2>/dev/null
rm -f $PIDFILE
}
- name: Enable and start wwan-bringup service
community.openwrt.service:
name: wwan-bringup
enabled: true
state: started
+2 -2
View File
@@ -51,10 +51,10 @@
data:
- interface: pppoe-gpon
list: wan
- interface: lte1
list: wan
- interface: sit1
list: wan
- interface: vlan6
list: wan
handle_absent_entries: remove
handle_entries_content: remove_as_much_as_possible
+15 -11
View File
@@ -10,11 +10,6 @@
chain: forward
comment: Allow all already established connections
connection-state: established,related
- action: accept
chain: forward
comment: Allow LTE modem management (next rule forbids it otherwise)
dst-address: 192.168.8.1
out-interface: lte1
- action: reject
chain: forward
comment: Forbid forwarding 192.168.0.0/16 to WAN
@@ -173,7 +168,13 @@
comment: Allow BGP from SRV
dst-port: 179
in-interface: vlan4
protocol: udp
protocol: tcp
- action: accept
chain: input
comment: Allow BGP from OPENWRT UPLINK
dst-port: 179
in-interface: vlan6
protocol: tcp
- action: accept
chain: input
comment: NAT-PMP from LAN
@@ -243,15 +244,11 @@
- action: masquerade
chain: srcnat
comment: Masquerade to internet
out-interface-list: wan
out-interface: pppoe-gpon
- action: masquerade
chain: srcnat
comment: GPON ONT management
dst-address: 192.168.100.1
- action: masquerade
chain: srcnat
comment: LTE Modem management
dst-address: 192.168.8.1
- action: dst-nat
chain: dstnat
comment: TS3
@@ -516,6 +513,13 @@
in-interface: vlan4
protocol: tcp
src-address: 2001:470:61a3:100::/64
- action: accept
chain: input
comment: Allow BGP from OPENWRT UPLINK
dst-port: 179
in-interface: vlan6
protocol: tcp
src-address: 2001:470:61a3:600::/64
- action: reject
chain: input
comment: Reject all remaining
-33
View File
@@ -39,39 +39,6 @@
loop_control:
label: "{{ item.default_name }}"
- name: Configure LTE interface defaults
community.routeros.api_find_and_modify:
ignore_dynamic: false
path: interface lte
find:
default-name: lte1
values:
apn-profiles: default-nodns
comment: Backup LTE WAN
- name: Configure LTE APN profiles
community.routeros.api_modify:
path: interface lte apn
data:
- add-default-route: false
apn: internet
comment: default but without dns and default route
ipv6-interface: lte1
name: default-nodns
use-network-apn: true
use-peer-dns: false
# Default APN we can't really remove yet I don't want to reconfigure it
- add-default-route: true
apn: internet
authentication: none
default-route-distance: 2
ip-type: auto
name: default
use-network-apn: true
use-peer-dns: true
handle_absent_entries: remove
handle_entries_content: remove_as_much_as_possible
- name: Configure temporary disk for containers
community.routeros.api_modify:
path: disk
+22 -9
View File
@@ -21,15 +21,6 @@
suppress-hw-offload: false
target-scope: 10
vrf-interface: pppoe-gpon
- disabled: false
distance: 2
dst-address: 0.0.0.0/0
gateway: 192.168.8.1
routing-table: main
scope: 30
suppress-hw-offload: false
target-scope: 10
vrf-interface: lte1
handle_absent_entries: remove
handle_entries_content: remove_as_much_as_possible
@@ -93,5 +84,27 @@
remote.address: 2001:470:61a3:100::3/128
routing-table: main
templates: klaster
- name: dlink-lte
afi: ip,ipv6
as: 65000
connect: true
disabled: false
instance: bgp-homelab
listen: true
# ibgp-rr: CRS acts as route reflector for D-Link (the RR client).
# This allows k8s routes learned from bgp1 to be reflected to D-Link
# without violating iBGP split-horizon.
local.role: ibgp-rr
remote.address: 192.168.6.2/32
routing-table: main
templates: klaster
hold-time: 30s
keepalive-time: 10s
# Redistribute connected (VLAN addresses) and static routes (Tailscale,
# GPON default) so D-Link has explicit routes to all internal subnets
# and a default route when GPON is up.
output.redistribute: connected,static
output.default-originate: if-installed
nexthop-choice: force-self
handle_absent_entries: remove
handle_entries_content: remove_as_much_as_possible