//go:build linux package agent import ( "context" "fmt" "net" "time" "code.fritzlab.net/fritzlab/flock/pkg/agent/netpol" ) // configureRuntime wires Pod informer, IPAM, netlink, and BIRD on a real // Linux node. Steps: // // 1. Wait for NodeConfig (operator-applied per-node CR). // 2. Reconcile any pre-existing kernel state from allocations.json into // IPAM.used (so we never re-allocate an in-flight pod's IP). // 3. Garbage-collect any state==pending entries (partial ADDs from a // previous agent generation). // 4. Start the Pod informer (filtered to spec.nodeName == node). // 5. Build PodHandler and SetHandlers(add, del, check). // 6. Install BIRD blackhole summary routes + render initial config. func (s *Server) configureRuntime(ctx context.Context) error { if err := s.firstAvailableNodeConfig(ctx, 60*time.Second); err != nil { return err } nc := s.NodeConfig.Load() ipam, err := NewIPAM(nc.Spec.CIDR6, nc.Spec.CIDR4) if err != nil { return fmt.Errorf("init ipam: %w", err) } // Reconcile committed entries; GC pending entries. for _, a := range s.Store.Snapshot() { switch a.State { case StateCommitted: if a.IP6 != "" { ipam.MarkInUse(net.ParseIP(a.IP6)) } if a.IP4 != "" { ipam.MarkInUse(net.ParseIP(a.IP4)) } case StatePending: s.Logger.Info("GC pending allocation", "container_id", a.ContainerID) _ = Teardown(a.ContainerID, net.ParseIP(a.IP6), net.ParseIP(a.IP4)) _ = s.Store.Delete(a.ContainerID) } } pods, err := StartPodInformer(ctx, s.restCfg, s.Node, s.Logger) if err != nil { return fmt.Errorf("pod informer: %w", err) } // Keep NetworkUnavailable=False so the node.kubernetes.io/network- // unavailable taint never gets re-applied. Calico's calico-node sets // it on shutdown; without an owner replacing it, kubelet's controller // taints the node and blocks scheduling. go keepNetworkAvailable(ctx, s.restCfg, s.Node, s.Logger) bird := &BirdManager{ NodeName: s.Node, ConfigPath: "/etc/flock/bird/bird.conf", BirdcSocket: "/run/flock/bird.ctl", Logger: s.Logger, } // Install kernel blackhole routes for the node summary CIDRs. These // stay regardless of BGP — they keep the kernel from sending unknown // destinations within our /64 to a default route loop. if err := bird.SummaryRoutes(nc); err != nil { s.Logger.Warn("install summary routes", "err", err) } // Calico is fenced off this node (Tigera Installation CR adds a // nodeAffinity excluding flock.fritzlab.net/agent on // calicoNodeDaemonSet). flock now owns BGP from this host. routerID := routerIDFromNodeIP(s.restCfg) if err := bird.Render(nc, nil, nil, routerID); err != nil { s.Logger.Warn("initial bird render", "err", err) } // AnycastReconciler is the single owner of bird re-renders going // forward. It runs every 2s + on Pod readiness changes + on each // successful CNI ADD/DEL. anycast := NewAnycastReconciler(s.Node, s.Store, pods, s.NodeConfig, bird, routerID, s.Logger) pods.OnReadyChange(anycast.Trigger) go anycast.Run(ctx) // Background tick for SummaryRoutes (idempotent) in case the kernel // blackhole disappears for any reason. go func() { t := time.NewTicker(60 * time.Second) defer t.Stop() for { select { case <-ctx.Done(): return case <-t.C: if cur := s.NodeConfig.Load(); cur != nil { _ = bird.SummaryRoutes(cur) } } } }() // NetworkPolicy enforcement. world := netpol.NewWorld(s.Logger) if err := world.Start(ctx, s.restCfg); err != nil { return fmt.Errorf("netpol informers: %w", err) } npApplier := &netpol.Applier{} npReconciler := netpol.NewReconciler(world, func() []netpol.Pod { return collectLocalPods(s.Store, pods) }, npApplier, s.Logger) go npReconciler.Run(ctx) handler := &PodHandler{ Node: s.Node, Store: s.Store, IPAM: ipam, Pods: pods, NodeConfig: s.NodeConfig, SetupFunc: Setup, TeardownFunc: Teardown, AfterCommit: func() { anycast.Trigger() // Re-evaluate policy on every CNI ADD/DEL so a brand-new // pod's chain lands before its first packet egresses. npReconciler.Trigger() }, } s.RPC.SetHandlers(handler.Add, handler.Del, handler.Check) s.Logger.Info("runtime ready", "asn", nc.Spec.BGP.ASN, "cidr6", nc.Spec.CIDR6, "cidr4", nc.Spec.CIDR4, "committed", len(s.Store.Snapshot()), ) return nil } // routerIDFromNodeIP picks a stable IPv4 to use as BIRD router-id. Uses // the host network for now; falls back to a synthesized value derived // from the node name if no v4 is reachable. func routerIDFromNodeIP(_ interface{}) string { // Best-effort: read the kernel route table for a default-route src. addrs, err := net.InterfaceAddrs() if err == nil { for _, a := range addrs { ipn, ok := a.(*net.IPNet) if !ok { continue } v4 := ipn.IP.To4() if v4 == nil || v4.IsLoopback() || v4.IsLinkLocalUnicast() { continue } return v4.String() } } // Fallback: 127.0.0.1 — bird will accept it but BGP peers won't like a // duplicate router-id. The agent log will scream above this if it fires. return "127.0.0.1" }