M2: netlink, IPAM/handler wiring, BIRD sidecar, CNI installer
Build flock Image / build (push) Has been cancelled

Code (Linux build, with no-op stubs for macOS dev):
- pkg/agent/netns_linux.go: ensureVeth → host-side configure (addrgenmode
  none, fe80::1/64, proxy_arp, forwarding) → move peer to pod ns →
  configure pod side (addr, default route via fe80::1, v4 169.254.1.1
  on-link gateway) → host /128 + /32 routes. Idempotent.
- pkg/agent/hostiface.go: deterministic host iface name flock<8hex> from
  FNV-1a-32(containerID).
- pkg/agent/annotations.go: parse flock.fritzlab.net/{ipv6,ipv4,cidr6,
  cidr4,ip-algo,anycast} with design-doc defaults; ParseCNIArgs for the
  K8S_POD_* keys kubelet sets.
- pkg/agent/podinfo.go: shared informer scoped to spec.nodeName==NODE,
  WaitForPod helper for ADD-vs-informer-sync race.
- pkg/agent/handlers.go: PodHandler does
    cache lookup → annotations → IPAM → store(pending) → SetupFunc →
    store(committed) → Result. Idempotent on retry. Del symmetric.
- pkg/routing/bird/config.go: text/template render with stable ordering;
  golden tests for host001 + anycast injection + sort stability.
- pkg/agent/bird.go: writes /etc/flock/bird/bird.conf, debounces 500ms,
  execs `birdc -s /run/flock/bird.ctl configure`. Installs blackhole
  kernel routes for the node summary CIDRs so BIRD's protocol kernel
  imports them.
- pkg/agent/runtime_linux.go: at startup, waits up to 60s for the per-
  node NodeConfig, reconciles committed allocations into IPAM.used,
  garbage-collects pending entries, builds PodHandler, swaps RPC
  handlers in.
- cmd/flock-installer: init-container binary that copies /opt/cni/bin/
  flock and writes 01-flock.conflist (lex-first so kubelet picks it
  over Calico's 10-calico.conflist on flock-labeled nodes).

Deploy:
- Dockerfile: alpine + iproute2 + bird2; multi-binary image.
- deploy/daemonset.yaml: install-cni init container; bird sidecar
  sharing /etc/flock/bird + /run/flock with the agent; ConfigMap-seeded
  bootstrap bird.conf so the sidecar boots before the agent renders.
  Privileged on flock-agent + install-cni; bird sidecar uses
  NET_ADMIN/RAW only.
- RBAC: pods + networkpolicies get/list/watch (the latter is reserved
  for M8 — harmless to grant now).

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Donavan Fritz
2026-04-24 22:33:48 -05:00
parent 31fcae2a97
commit eb1f5e0d8d
20 changed files with 1688 additions and 61 deletions
+143
View File
@@ -0,0 +1,143 @@
package agent
import (
"errors"
"fmt"
"log/slog"
"net"
"os"
"os/exec"
"path/filepath"
"sync"
"time"
flockv1alpha1 "code.fritzlab.net/fritzlab/flock/pkg/api/v1alpha1"
"code.fritzlab.net/fritzlab/flock/pkg/routing/bird"
)
// BirdManager renders bird.conf and triggers birdc reload. Writes are
// debounced so a burst of NodeConfig / anycast changes coalesces.
type BirdManager struct {
NodeName string
ConfigPath string // /etc/flock/bird/bird.conf
BirdcSocket string // /run/flock/bird6.ctl (BIRD2 single-socket default)
BirdctlPath string // "birdc" — overridable for tests
Logger *slog.Logger
mu sync.Mutex
last string // last rendered output (de-dup)
debounce *time.Timer
}
// Render writes the config from a NodeConfig + anycast set. Idempotent —
// if the rendered content matches what we last wrote, no birdc reload.
func (b *BirdManager) Render(nc *flockv1alpha1.NodeConfig, anycast6, anycast4 []string, routerID string) error {
if nc == nil {
return fmt.Errorf("no NodeConfig")
}
in := bird.NodeBGP{
NodeName: b.NodeName,
RouterID: routerID,
LocalASN: nc.Spec.BGP.ASN,
CIDR6: nc.Spec.CIDR6,
CIDR4: nc.Spec.CIDR4,
Anycast6: anycast6,
Anycast4: anycast4,
}
for _, p := range nc.Spec.BGP.Peers {
fam := bird.FamilyOf(p.Address)
if fam == "" {
continue
}
in.Peers = append(in.Peers, bird.Peer{Family: fam, Address: p.Address, ASN: p.ASN})
}
cfg, err := bird.Render(in)
if err != nil {
return err
}
b.mu.Lock()
defer b.mu.Unlock()
if cfg == b.last {
return nil
}
if err := os.MkdirAll(filepath.Dir(b.ConfigPath), 0o755); err != nil {
return fmt.Errorf("mkdir bird config dir: %w", err)
}
tmp := b.ConfigPath + ".tmp"
if err := os.WriteFile(tmp, []byte(cfg), 0o644); err != nil {
return fmt.Errorf("write bird.conf: %w", err)
}
if err := os.Rename(tmp, b.ConfigPath); err != nil {
return fmt.Errorf("rename bird.conf: %w", err)
}
b.last = cfg
b.scheduleReload()
return nil
}
// scheduleReload coalesces birdc reload calls into ~500ms windows.
func (b *BirdManager) scheduleReload() {
if b.debounce != nil {
b.debounce.Stop()
}
b.debounce = time.AfterFunc(500*time.Millisecond, b.reload)
}
func (b *BirdManager) reload() {
birdctl := b.BirdctlPath
if birdctl == "" {
birdctl = "birdc"
}
socket := b.BirdcSocket
if socket == "" {
socket = "/run/flock/bird.ctl"
}
cmd := exec.Command(birdctl, "-s", socket, "configure")
out, err := cmd.CombinedOutput()
if err != nil {
// First-run case: bird may not be ready yet — retry on next change.
if errors.Is(err, exec.ErrNotFound) || os.IsNotExist(err) {
b.Logger.Warn("birdc not available", "err", err)
return
}
b.Logger.Warn("birdc reload failed", "err", err, "out", string(out))
return
}
b.Logger.Info("birdc configure ok", "out", string(out))
}
// SummaryRoutes installs blackhole kernel routes for each NodeConfig CIDR.
// BIRD's protocol kernel imports them so they get advertised. Idempotent.
func (b *BirdManager) SummaryRoutes(nc *flockv1alpha1.NodeConfig) error {
if nc == nil {
return nil
}
for _, c := range nc.Spec.CIDR6 {
if err := installBlackhole(c); err != nil {
b.Logger.Warn("blackhole route v6", "cidr", c, "err", err)
}
}
for _, c := range nc.Spec.CIDR4 {
if err := installBlackhole(c); err != nil {
b.Logger.Warn("blackhole route v4", "cidr", c, "err", err)
}
}
return nil
}
func installBlackhole(cidr string) error {
// Use `ip` rather than netlink so this file stays portable for non-Linux
// builds (the agent on macOS just no-ops). The agent only runs in
// Kubernetes pods on Linux nodes, so the exec is fine.
_, _, err := net.ParseCIDR(cidr)
if err != nil {
return err
}
cmd := exec.Command("ip", "route", "replace", "blackhole", cidr)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("ip route replace blackhole %s: %w (%s)", cidr, err, string(out))
}
return nil
}