Files
Donavan Fritz 9b777ca7d1
Build flock Image / build (push) Successful in 2m17s
bird: per-peer import filter rejects connected subnet
Without a filter, crt001's `network 2602:817:3000:A25::/64` gets
re-advertised to every peer on that subnet. bird installs the BGP /64
with metric 32, beating the kernel-connected route at 256, and all
inter-host VLAN-25 traffic hairpins through the gateway — losing PMTU
9000 and ~30x throughput. Broke Plex 2026-05-04: NFS to nas002 capped
at 7 MB/s, jumbo blackholed.

Add LocalSubnetV6/V4 (CIDR) to NodeBGP. Agent populates by masking the
peer's address to /64 (v6) or /24 (v4) — same fritzlab convention
already in localAddrSameSubnet. Render emits `import where net !=
<subnet>;` per BGP channel when set, falls back to `import all;`
otherwise so existing tests stay green.

Defence in depth: with the matching outbound route-map on crt001
(ROUTE_MAP_CLUSTER_OUT_V{4,6}) the agent now refuses the leak on its
own if the router filter ever drifts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 21:03:59 -05:00

259 lines
7.3 KiB
Go

package agent
import (
"errors"
"fmt"
"log/slog"
"net"
"os"
"os/exec"
"path/filepath"
"sync"
"time"
flockv1alpha1 "code.fritzlab.net/fritzlab/flock/pkg/api/v1alpha1"
"code.fritzlab.net/fritzlab/flock/pkg/routing/bird"
)
// BirdManager renders bird.conf and triggers birdc reload. Writes are
// debounced so a burst of NodeConfig / anycast changes coalesces.
type BirdManager struct {
NodeName string
ConfigPath string // /etc/flock/bird/bird.conf
BirdcSocket string // /run/flock/bird6.ctl (BIRD2 single-socket default)
BirdctlPath string // "birdc" — overridable for tests
Logger *slog.Logger
mu sync.Mutex
last string // last rendered output (de-dup)
cooldown *time.Timer
cooldownEnd time.Time // window during which further reloads are coalesced
pending bool // a render landed during cooldown; reload at window end
}
// reloadCooldown is the minimum spacing between two birdc reloads. The
// first change fires immediately (no leading-edge delay); follow-up
// changes within this window are coalesced into a single tail reload.
const reloadCooldown = 500 * time.Millisecond
// Render writes the config from a NodeConfig + anycast set. Idempotent —
// if the rendered content matches what we last wrote, no birdc reload.
func (b *BirdManager) Render(nc *flockv1alpha1.NodeConfig, anycast6, anycast4 []string, routerID string) error {
if nc == nil {
return fmt.Errorf("no NodeConfig")
}
in := bird.NodeBGP{
NodeName: b.NodeName,
RouterID: routerID,
LocalASN: nc.Spec.BGP.ASN,
CIDR6: nc.Spec.CIDR6,
CIDR4: nc.Spec.CIDR4,
Anycast6: anycast6,
Anycast4: anycast4,
}
// Pick a local source address per family that's on the same subnet as
// the BGP peer. crt001 rejects IPv6 advertisements whose next-hop is
// link-local-only; an explicit `source address` makes BIRD use a
// global next-hop self, which Cisco accepts.
//
// Also derive the connected subnet (peer IP masked to /64 v6 / /24 v4)
// per family. Render uses it to install `import where net != <subnet>`
// on the BGP channel so the gateway can't readvertise our own connected
// /64 back to us — accepting it would override the kernel route and
// hairpin all inter-host traffic via the gateway.
for _, p := range nc.Spec.BGP.Peers {
fam := bird.FamilyOf(p.Address)
if fam == "" {
continue
}
in.Peers = append(in.Peers, bird.Peer{Family: fam, Address: p.Address, ASN: p.ASN})
if local := localAddrSameSubnet(p.Address); local != "" {
if fam == "v6" && in.LocalV6 == "" {
in.LocalV6 = local
}
if fam == "v4" && in.LocalV4 == "" {
in.LocalV4 = local
}
}
if subnet := peerSubnet(p.Address); subnet != "" {
if fam == "v6" && in.LocalSubnetV6 == "" {
in.LocalSubnetV6 = subnet
}
if fam == "v4" && in.LocalSubnetV4 == "" {
in.LocalSubnetV4 = subnet
}
}
}
cfg, err := bird.Render(in)
if err != nil {
return err
}
b.mu.Lock()
defer b.mu.Unlock()
if cfg == b.last {
return nil
}
if err := os.MkdirAll(filepath.Dir(b.ConfigPath), 0o755); err != nil {
return fmt.Errorf("mkdir bird config dir: %w", err)
}
tmp := b.ConfigPath + ".tmp"
if err := os.WriteFile(tmp, []byte(cfg), 0o644); err != nil {
return fmt.Errorf("write bird.conf: %w", err)
}
if err := os.Rename(tmp, b.ConfigPath); err != nil {
return fmt.Errorf("rename bird.conf: %w", err)
}
b.last = cfg
b.scheduleReload()
return nil
}
// scheduleReload uses leading-edge + cooldown semantics: the first call
// reloads immediately; subsequent calls within reloadCooldown coalesce
// into a single deferred reload at the cooldown's end. Caller holds b.mu.
func (b *BirdManager) scheduleReload() {
now := time.Now()
if now.After(b.cooldownEnd) {
// Outside any active cooldown — fire now (leading edge).
b.cooldownEnd = now.Add(reloadCooldown)
b.pending = false
go b.reload()
return
}
// Inside cooldown — coalesce. If no tail timer is set, schedule one
// at the cooldown end; if already set, just leave it.
if b.pending {
return
}
b.pending = true
delay := b.cooldownEnd.Sub(now)
b.cooldown = time.AfterFunc(delay, func() {
b.mu.Lock()
b.pending = false
b.cooldownEnd = time.Now().Add(reloadCooldown)
b.mu.Unlock()
b.reload()
})
}
func (b *BirdManager) reload() {
birdctl := b.BirdctlPath
if birdctl == "" {
birdctl = "birdc"
}
socket := b.BirdcSocket
if socket == "" {
socket = "/run/flock/bird.ctl"
}
cmd := exec.Command(birdctl, "-s", socket, "configure")
out, err := cmd.CombinedOutput()
if err != nil {
// First-run case: bird may not be ready yet — retry on next change.
if errors.Is(err, exec.ErrNotFound) || os.IsNotExist(err) {
b.Logger.Warn("birdc not available", "err", err)
return
}
b.Logger.Warn("birdc reload failed", "err", err, "out", string(out))
return
}
b.Logger.Info("birdc configure ok", "out", string(out))
}
// SummaryRoutes installs blackhole kernel routes for each NodeConfig CIDR.
// BIRD's protocol kernel imports them so they get advertised. Idempotent.
func (b *BirdManager) SummaryRoutes(nc *flockv1alpha1.NodeConfig) error {
if nc == nil {
return nil
}
for _, c := range nc.Spec.CIDR6 {
if err := installBlackhole(c); err != nil {
b.Logger.Warn("blackhole route v6", "cidr", c, "err", err)
}
}
for _, c := range nc.Spec.CIDR4 {
if err := installBlackhole(c); err != nil {
b.Logger.Warn("blackhole route v4", "cidr", c, "err", err)
}
}
return nil
}
// peerSubnet returns the canonical CIDR of the assumed connected subnet
// containing `peer` — /64 for IPv6, /24 for IPv4. Returns "" if peer
// doesn't parse. Matches the assumption already baked into
// localAddrSameSubnet: fritzlab convention is /64 v6 and /24 v4.
func peerSubnet(peer string) string {
pip := net.ParseIP(peer)
if pip == nil {
return ""
}
var mask net.IPMask
if pip.To4() != nil {
mask = net.CIDRMask(24, 32)
} else {
mask = net.CIDRMask(64, 128)
}
n := &net.IPNet{IP: pip.Mask(mask), Mask: mask}
return n.String()
}
// localAddrSameSubnet finds an IP on a local interface that's in the same
// /64 (v6) or /24 (v4) as `peer`. Returns "" if none. Used to derive the
// `source address` for a BGP session.
func localAddrSameSubnet(peer string) string {
pip := net.ParseIP(peer)
if pip == nil {
return ""
}
addrs, err := net.InterfaceAddrs()
if err != nil {
return ""
}
v4 := pip.To4() != nil
for _, a := range addrs {
ipn, ok := a.(*net.IPNet)
if !ok {
continue
}
ip := ipn.IP
if ip.IsLoopback() || ip.IsLinkLocalUnicast() {
continue
}
if (ip.To4() != nil) != v4 {
continue
}
// Use the peer's mask (assume same subnet) for membership test.
var mask net.IPMask
if v4 {
mask = net.CIDRMask(24, 32)
} else {
mask = net.CIDRMask(64, 128)
}
peerSubnet := &net.IPNet{IP: pip, Mask: mask}
if peerSubnet.Contains(ip) {
if v4 {
return ip.To4().String()
}
return ip.To16().String()
}
}
return ""
}
func installBlackhole(cidr string) error {
// Use `ip` rather than netlink so this file stays portable for non-Linux
// builds (the agent on macOS just no-ops). The agent only runs in
// Kubernetes pods on Linux nodes, so the exec is fine.
_, _, err := net.ParseCIDR(cidr)
if err != nil {
return err
}
cmd := exec.Command("ip", "route", "replace", "blackhole", cidr)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("ip route replace blackhole %s: %w (%s)", cidr, err, string(out))
}
return nil
}