//go:build linux package agent import ( "context" "fmt" "net" "time" ) // configureRuntime wires Pod informer, IPAM, netlink, and BIRD on a real // Linux node. Steps: // // 1. Wait for NodeConfig (operator-applied per-node CR). // 2. Reconcile any pre-existing kernel state from allocations.json into // IPAM.used (so we never re-allocate an in-flight pod's IP). // 3. Garbage-collect any state==pending entries (partial ADDs from a // previous agent generation). // 4. Start the Pod informer (filtered to spec.nodeName == node). // 5. Build PodHandler and SetHandlers(add, del, check). // 6. Install BIRD blackhole summary routes + render initial config. func (s *Server) configureRuntime(ctx context.Context) error { if err := s.firstAvailableNodeConfig(ctx, 60*time.Second); err != nil { return err } nc := s.NodeConfig.Load() ipam, err := NewIPAM(nc.Spec.CIDR6, nc.Spec.CIDR4) if err != nil { return fmt.Errorf("init ipam: %w", err) } // Reconcile committed entries; GC pending entries. for _, a := range s.Store.Snapshot() { switch a.State { case StateCommitted: if a.IP6 != "" { ipam.MarkInUse(net.ParseIP(a.IP6)) } if a.IP4 != "" { ipam.MarkInUse(net.ParseIP(a.IP4)) } case StatePending: s.Logger.Info("GC pending allocation", "container_id", a.ContainerID) _ = Teardown(a.ContainerID, net.ParseIP(a.IP6), net.ParseIP(a.IP4)) _ = s.Store.Delete(a.ContainerID) } } pods, err := StartPodInformer(ctx, s.restCfg, s.Node, s.Logger) if err != nil { return fmt.Errorf("pod informer: %w", err) } bird := &BirdManager{ NodeName: s.Node, ConfigPath: "/etc/flock/bird/bird.conf", BirdcSocket: "/run/flock/bird.ctl", Logger: s.Logger, } // Install kernel blackhole routes for the node summary CIDRs. These // stay regardless of BGP — they keep the kernel from sending unknown // destinations within our /64 to a default route loop. if err := bird.SummaryRoutes(nc); err != nil { s.Logger.Warn("install summary routes", "err", err) } // BGP is intentionally NOT rendered on the first cutover. // // Calico's calico-node DaemonSet still runs on this node (it's // Tigera-Operator-managed via ArgoCD with selfHeal=true) and Calico's // bird is bound to BGP port 179 with the same ASN we'd advertise from. // A clean coexistence requires either an Installation-CR change or a // post-cutover Calico stop. Both are out of scope for the first M2 // cutover. crt001 carries a static route for the flock /64 instead. // // To switch to live BGP later: replace this block with bird.Render(nc, // ...) + 15s tick re-render, after disabling calico-node on flock- // labeled nodes. The bird sidecar is already running with a bootstrap // config (just protocol kernel + device — no BGP), so flipping this on // is a one-line change here. s.Logger.Info("BIRD BGP disabled for first cutover; static route on crt001 carries flock /64", "node_cidr6", nc.Spec.CIDR6, "node_cidr4", nc.Spec.CIDR4) handler := &PodHandler{ Node: s.Node, Store: s.Store, IPAM: ipam, Pods: pods, NodeConfig: s.NodeConfig, SetupFunc: Setup, TeardownFunc: Teardown, AfterCommit: func() { // Future: collect anycast IPs from store snapshot, re-render bird. }, } s.RPC.SetHandlers(handler.Add, handler.Del, handler.Check) s.Logger.Info("runtime ready", "asn", nc.Spec.BGP.ASN, "cidr6", nc.Spec.CIDR6, "cidr4", nc.Spec.CIDR4, "committed", len(s.Store.Snapshot()), ) return nil } // routerIDFromNodeIP picks a stable IPv4 to use as BIRD router-id. Uses // the host network for now; falls back to a synthesized value derived // from the node name if no v4 is reachable. func routerIDFromNodeIP(_ interface{}) string { // Best-effort: read the kernel route table for a default-route src. addrs, err := net.InterfaceAddrs() if err == nil { for _, a := range addrs { ipn, ok := a.(*net.IPNet) if !ok { continue } v4 := ipn.IP.To4() if v4 == nil || v4.IsLoopback() || v4.IsLinkLocalUnicast() { continue } return v4.String() } } // Fallback: 127.0.0.1 — bird will accept it but BGP peers won't like a // duplicate router-id. The agent log will scream above this if it fires. return "127.0.0.1" }