//go:build linux package agent import ( "context" "errors" "fmt" "log/slog" "net" "sync" "syscall" "time" flockv1alpha1 "code.fritzlab.net/fritzlab/flock/pkg/api/v1alpha1" "github.com/vishvananda/netlink" ) // AnycastReconciler keeps the kernel's anycast host routes and BIRD's // advertised set in sync with (committed allocations × pod Ready). // // Lifecycle (per design doc): // - CNI ADD assigns anycast IPs to pod lo (already done in netns_linux.go). // - Pod transitions to Ready=True → install host /128 (or /32) route at // `dev flock<8hex>` and add the IP to BIRD's export filter. // - Pod transitions to Ready=False or DELETE → remove kernel route, remove // from BIRD export. // // Reconcile is idempotent. Triggers: AfterCommit hook, Pod informer // UpdateFunc on Ready transitions, periodic 2s tick. type AnycastReconciler struct { Node string Store *Store Pods *PodCache NodeConfig *NodeConfigCache Bird *BirdManager RouterID string Logger *slog.Logger mu sync.Mutex advertised map[string]anycastTarget // canonical IP → install info trigger chan struct{} } // anycastTarget describes the kernel route shape for one advertised // anycast IP: which veth, and which pod eth0 IP to use as next-hop. type anycastTarget struct { hostIface string via net.IP } // NewAnycastReconciler returns a Reconciler ready to Run. func NewAnycastReconciler(node string, store *Store, pods *PodCache, nc *NodeConfigCache, bird *BirdManager, routerID string, logger *slog.Logger) *AnycastReconciler { return &AnycastReconciler{ Node: node, Store: store, Pods: pods, NodeConfig: nc, Bird: bird, RouterID: routerID, Logger: logger, advertised: map[string]anycastTarget{}, trigger: make(chan struct{}, 1), } } // Trigger requests one reconcile pass. Coalesces — if a pass is already // pending, the call is a no-op. func (r *AnycastReconciler) Trigger() { select { case r.trigger <- struct{}{}: default: } } // Run blocks until ctx is cancelled. Reconciles on Trigger or every 2s. func (r *AnycastReconciler) Run(ctx context.Context) { t := time.NewTicker(2 * time.Second) defer t.Stop() r.reconcile() // initial pass for { select { case <-ctx.Done(): return case <-t.C: r.reconcile() case <-r.trigger: r.reconcile() } } } func (r *AnycastReconciler) reconcile() { r.mu.Lock() defer r.mu.Unlock() desired := r.computeDesired() // Install routes that should exist but don't (or whose target changed). for ip, t := range desired { if cur, ok := r.advertised[ip]; ok && cur.hostIface == t.hostIface && cur.via.Equal(t.via) { continue } if err := installAnycastRoute(ip, t); err != nil { r.Logger.Warn("anycast install", "ip", ip, "host", t.hostIface, "via", t.via, "err", err) continue } r.Logger.Info("anycast advertise", "ip", ip, "host", t.hostIface, "via", t.via) r.advertised[ip] = t } // Remove routes that exist but shouldn't. for ip, t := range r.advertised { if _, want := desired[ip]; !want { if err := removeAnycastRoute(ip, t); err != nil { r.Logger.Warn("anycast remove", "ip", ip, "host", t.hostIface, "err", err) } else { r.Logger.Info("anycast withdraw", "ip", ip, "host", t.hostIface) } delete(r.advertised, ip) } } // Re-render BIRD with the active set. r.renderBird(desired) } // computeDesired walks the Store and returns the per-ip anycastTarget for // every anycast advertisement that should be active right now. Each target // uses the pod's own eth0 IP (same family) as the route's `via` next-hop — // that way kernel NDP/ARP resolves the eth0 address, which IS configured // on the pod's eth0, so the pod responds normally without proxy_ndp. func (r *AnycastReconciler) computeDesired() map[string]anycastTarget { out := map[string]anycastTarget{} for _, a := range r.Store.Snapshot() { if a.State != StateCommitted || len(a.Anycast) == 0 { continue } pod, ok := r.Pods.Get(a.Namespace, a.PodName) if !ok || !podReady(pod) { continue } host := HostIfaceName(a.ContainerID) via6 := net.ParseIP(a.IP6) via4 := net.ParseIP(a.IP4) for _, ipStr := range a.Anycast { ip := net.ParseIP(ipStr) if ip == nil { continue } var via net.IP if ip.To4() != nil { via = via4 } else { via = via6 } if via == nil { r.Logger.Warn("anycast skipped: pod has no unicast IP of same family", "pod", a.Namespace+"/"+a.PodName, "anycast", ipStr) continue } out[canonical(ip)] = anycastTarget{hostIface: host, via: via} } } return out } func (r *AnycastReconciler) renderBird(desired map[string]anycastTarget) { nc := r.NodeConfig.Load() if nc == nil || r.Bird == nil { return } var v6, v4 []string for ipStr := range desired { ip := net.ParseIP(ipStr) if ip == nil { continue } if ip.To4() != nil { v4 = append(v4, ip.To4().String()) } else { v6 = append(v6, ip.To16().String()) } } if err := r.Bird.Render(nc, v6, v4, r.RouterID); err != nil { r.Logger.Warn("anycast bird render", "err", err) } } // installAnycastRoute installs `/<128|32> via t.via dev t.hostIface`. // Idempotent — RouteReplace overwrites a stale entry. func installAnycastRoute(ipStr string, t anycastTarget) error { ip := net.ParseIP(ipStr) if ip == nil { return fmt.Errorf("bad ip %q", ipStr) } link, err := netlink.LinkByName(t.hostIface) if err != nil { return fmt.Errorf("lookup %s: %w", t.hostIface, err) } prefix := 128 if ip.To4() != nil { prefix = 32 ip = ip.To4() } r := &netlink.Route{ LinkIndex: link.Attrs().Index, Dst: cidrFor(ip, prefix), Gw: t.via, // SCOPE_UNIVERSE — the gateway is on a different "logical" subnet // than the local /128 route, but reachable on this veth. Linux is // happy as long as the veth has IPv6 forwarding on (it does — set // in configureHostSide) and the pod's eth0 has the via address // (also true — that's the pod's IP6/IP4 we allocated). } return netlink.RouteReplace(r) } // removeAnycastRoute deletes the host route. Missing routes / interfaces // are treated as success — DEL paths can race with veth teardown. func removeAnycastRoute(ipStr string, t anycastTarget) error { ip := net.ParseIP(ipStr) if ip == nil { return nil } link, err := netlink.LinkByName(t.hostIface) if err != nil { return nil } prefix := 128 if ip.To4() != nil { prefix = 32 ip = ip.To4() } r := &netlink.Route{ LinkIndex: link.Attrs().Index, Dst: cidrFor(ip, prefix), Gw: t.via, } if err := netlink.RouteDel(r); err != nil { // ESRCH ("no such process") is netlink-speak for "no such route"; // treat as success. if errors.Is(err, syscall.ESRCH) || linkNotFound(err) { return nil } return err } return nil } // _ = flockv1alpha1 to silence unused import warnings on minimal builds. var _ = flockv1alpha1.GroupName