package agent import ( "errors" "fmt" "log/slog" "net" "os" "os/exec" "path/filepath" "sync" "time" flockv1alpha1 "code.fritzlab.net/fritzlab/flock/pkg/api/v1alpha1" "code.fritzlab.net/fritzlab/flock/pkg/routing/bird" ) // BirdManager renders bird.conf and triggers birdc reload. Writes are // debounced so a burst of NodeConfig / anycast changes coalesces. type BirdManager struct { NodeName string ConfigPath string // /etc/flock/bird/bird.conf BirdcSocket string // /run/flock/bird6.ctl (BIRD2 single-socket default) BirdctlPath string // "birdc" — overridable for tests Logger *slog.Logger mu sync.Mutex last string // last rendered output (de-dup) cooldown *time.Timer cooldownEnd time.Time // window during which further reloads are coalesced pending bool // a render landed during cooldown; reload at window end } // reloadCooldown is the minimum spacing between two birdc reloads. The // first change fires immediately (no leading-edge delay); follow-up // changes within this window are coalesced into a single tail reload. const reloadCooldown = 500 * time.Millisecond // Render writes the config from a NodeConfig + anycast set. Idempotent — // if the rendered content matches what we last wrote, no birdc reload. func (b *BirdManager) Render(nc *flockv1alpha1.NodeConfig, anycast6, anycast4 []string, routerID string) error { if nc == nil { return fmt.Errorf("no NodeConfig") } in := bird.NodeBGP{ NodeName: b.NodeName, RouterID: routerID, LocalASN: nc.Spec.BGP.ASN, CIDR6: nc.Spec.CIDR6, CIDR4: nc.Spec.CIDR4, Anycast6: anycast6, Anycast4: anycast4, } // Pick a local source address per family that's on the same subnet as // the BGP peer. crt001 rejects IPv6 advertisements whose next-hop is // link-local-only; an explicit `source address` makes BIRD use a // global next-hop self, which Cisco accepts. // // Also derive the connected subnet (peer IP masked to /64 v6 / /24 v4) // per family. Render uses it to install `import where net != ` // on the BGP channel so the gateway can't readvertise our own connected // /64 back to us — accepting it would override the kernel route and // hairpin all inter-host traffic via the gateway. for _, p := range nc.Spec.BGP.Peers { fam := bird.FamilyOf(p.Address) if fam == "" { continue } in.Peers = append(in.Peers, bird.Peer{Family: fam, Address: p.Address, ASN: p.ASN}) if local := localAddrSameSubnet(p.Address); local != "" { if fam == "v6" && in.LocalV6 == "" { in.LocalV6 = local } if fam == "v4" && in.LocalV4 == "" { in.LocalV4 = local } } if subnet := peerSubnet(p.Address); subnet != "" { if fam == "v6" && in.LocalSubnetV6 == "" { in.LocalSubnetV6 = subnet } if fam == "v4" && in.LocalSubnetV4 == "" { in.LocalSubnetV4 = subnet } } } cfg, err := bird.Render(in) if err != nil { return err } b.mu.Lock() defer b.mu.Unlock() if cfg == b.last { return nil } if err := os.MkdirAll(filepath.Dir(b.ConfigPath), 0o755); err != nil { return fmt.Errorf("mkdir bird config dir: %w", err) } tmp := b.ConfigPath + ".tmp" if err := os.WriteFile(tmp, []byte(cfg), 0o644); err != nil { return fmt.Errorf("write bird.conf: %w", err) } if err := os.Rename(tmp, b.ConfigPath); err != nil { return fmt.Errorf("rename bird.conf: %w", err) } b.last = cfg b.scheduleReload() return nil } // scheduleReload uses leading-edge + cooldown semantics: the first call // reloads immediately; subsequent calls within reloadCooldown coalesce // into a single deferred reload at the cooldown's end. Caller holds b.mu. func (b *BirdManager) scheduleReload() { now := time.Now() if now.After(b.cooldownEnd) { // Outside any active cooldown — fire now (leading edge). b.cooldownEnd = now.Add(reloadCooldown) b.pending = false go b.reload() return } // Inside cooldown — coalesce. If no tail timer is set, schedule one // at the cooldown end; if already set, just leave it. if b.pending { return } b.pending = true delay := b.cooldownEnd.Sub(now) b.cooldown = time.AfterFunc(delay, func() { b.mu.Lock() b.pending = false b.cooldownEnd = time.Now().Add(reloadCooldown) b.mu.Unlock() b.reload() }) } func (b *BirdManager) reload() { birdctl := b.BirdctlPath if birdctl == "" { birdctl = "birdc" } socket := b.BirdcSocket if socket == "" { socket = "/run/flock/bird.ctl" } cmd := exec.Command(birdctl, "-s", socket, "configure") out, err := cmd.CombinedOutput() if err != nil { // First-run case: bird may not be ready yet — retry on next change. if errors.Is(err, exec.ErrNotFound) || os.IsNotExist(err) { b.Logger.Warn("birdc not available", "err", err) return } b.Logger.Warn("birdc reload failed", "err", err, "out", string(out)) return } b.Logger.Info("birdc configure ok", "out", string(out)) } // SummaryRoutes installs blackhole kernel routes for each NodeConfig CIDR. // BIRD's protocol kernel imports them so they get advertised. Idempotent. func (b *BirdManager) SummaryRoutes(nc *flockv1alpha1.NodeConfig) error { if nc == nil { return nil } for _, c := range nc.Spec.CIDR6 { if err := installBlackhole(c); err != nil { b.Logger.Warn("blackhole route v6", "cidr", c, "err", err) } } for _, c := range nc.Spec.CIDR4 { if err := installBlackhole(c); err != nil { b.Logger.Warn("blackhole route v4", "cidr", c, "err", err) } } return nil } // peerSubnet returns the canonical CIDR of the assumed connected subnet // containing `peer` — /64 for IPv6, /24 for IPv4. Returns "" if peer // doesn't parse. Matches the assumption already baked into // localAddrSameSubnet: fritzlab convention is /64 v6 and /24 v4. func peerSubnet(peer string) string { pip := net.ParseIP(peer) if pip == nil { return "" } var mask net.IPMask if pip.To4() != nil { mask = net.CIDRMask(24, 32) } else { mask = net.CIDRMask(64, 128) } n := &net.IPNet{IP: pip.Mask(mask), Mask: mask} return n.String() } // localAddrSameSubnet finds an IP on a local interface that's in the same // /64 (v6) or /24 (v4) as `peer`. Returns "" if none. Used to derive the // `source address` for a BGP session. func localAddrSameSubnet(peer string) string { pip := net.ParseIP(peer) if pip == nil { return "" } addrs, err := net.InterfaceAddrs() if err != nil { return "" } v4 := pip.To4() != nil for _, a := range addrs { ipn, ok := a.(*net.IPNet) if !ok { continue } ip := ipn.IP if ip.IsLoopback() || ip.IsLinkLocalUnicast() { continue } if (ip.To4() != nil) != v4 { continue } // Use the peer's mask (assume same subnet) for membership test. var mask net.IPMask if v4 { mask = net.CIDRMask(24, 32) } else { mask = net.CIDRMask(64, 128) } peerSubnet := &net.IPNet{IP: pip, Mask: mask} if peerSubnet.Contains(ip) { if v4 { return ip.To4().String() } return ip.To16().String() } } return "" } func installBlackhole(cidr string) error { // Use `ip` rather than netlink so this file stays portable for non-Linux // builds (the agent on macOS just no-ops). The agent only runs in // Kubernetes pods on Linux nodes, so the exec is fine. _, _, err := net.ParseCIDR(cidr) if err != nil { return err } cmd := exec.Command("ip", "route", "replace", "blackhole", cidr) out, err := cmd.CombinedOutput() if err != nil { return fmt.Errorf("ip route replace blackhole %s: %w (%s)", cidr, err, string(out)) } return nil }