Files
flock/pkg/agent/podinfo.go
T
Donavan Fritz eb1f5e0d8d
Build flock Image / build (push) Has been cancelled
M2: netlink, IPAM/handler wiring, BIRD sidecar, CNI installer
Code (Linux build, with no-op stubs for macOS dev):
- pkg/agent/netns_linux.go: ensureVeth → host-side configure (addrgenmode
  none, fe80::1/64, proxy_arp, forwarding) → move peer to pod ns →
  configure pod side (addr, default route via fe80::1, v4 169.254.1.1
  on-link gateway) → host /128 + /32 routes. Idempotent.
- pkg/agent/hostiface.go: deterministic host iface name flock<8hex> from
  FNV-1a-32(containerID).
- pkg/agent/annotations.go: parse flock.fritzlab.net/{ipv6,ipv4,cidr6,
  cidr4,ip-algo,anycast} with design-doc defaults; ParseCNIArgs for the
  K8S_POD_* keys kubelet sets.
- pkg/agent/podinfo.go: shared informer scoped to spec.nodeName==NODE,
  WaitForPod helper for ADD-vs-informer-sync race.
- pkg/agent/handlers.go: PodHandler does
    cache lookup → annotations → IPAM → store(pending) → SetupFunc →
    store(committed) → Result. Idempotent on retry. Del symmetric.
- pkg/routing/bird/config.go: text/template render with stable ordering;
  golden tests for host001 + anycast injection + sort stability.
- pkg/agent/bird.go: writes /etc/flock/bird/bird.conf, debounces 500ms,
  execs `birdc -s /run/flock/bird.ctl configure`. Installs blackhole
  kernel routes for the node summary CIDRs so BIRD's protocol kernel
  imports them.
- pkg/agent/runtime_linux.go: at startup, waits up to 60s for the per-
  node NodeConfig, reconciles committed allocations into IPAM.used,
  garbage-collects pending entries, builds PodHandler, swaps RPC
  handlers in.
- cmd/flock-installer: init-container binary that copies /opt/cni/bin/
  flock and writes 01-flock.conflist (lex-first so kubelet picks it
  over Calico's 10-calico.conflist on flock-labeled nodes).

Deploy:
- Dockerfile: alpine + iproute2 + bird2; multi-binary image.
- deploy/daemonset.yaml: install-cni init container; bird sidecar
  sharing /etc/flock/bird + /run/flock with the agent; ConfigMap-seeded
  bootstrap bird.conf so the sidecar boots before the agent renders.
  Privileged on flock-agent + install-cni; bird sidecar uses
  NET_ADMIN/RAW only.
- RBAC: pods + networkpolicies get/list/watch (the latter is reserved
  for M8 — harmless to grant now).

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 22:33:48 -05:00

84 lines
2.6 KiB
Go

package agent
import (
"context"
"fmt"
"log/slog"
"time"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
)
// PodCache exposes a Get(ns, name) lookup against a node-scoped Pod
// informer. ADD/DEL handlers consult it to read annotations + labels for
// IPAM and (later) NetworkPolicy.
type PodCache struct {
lister cache.GenericLister
logger *slog.Logger
store cache.Store
}
// StartPodInformer launches a Pod informer filtered to spec.nodeName ==
// node. Returns a PodCache once the cache is synced. Blocks on initial
// list/watch sync.
func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger *slog.Logger) (*PodCache, error) {
cs, err := kubernetes.NewForConfig(cfg)
if err != nil {
return nil, fmt.Errorf("kubernetes client: %w", err)
}
tweak := func(opts *metav1.ListOptions) {
opts.FieldSelector = fields.OneTermEqualSelector("spec.nodeName", node).String()
}
factory := informers.NewSharedInformerFactoryWithOptions(cs, 10*time.Minute,
informers.WithTweakListOptions(tweak))
inf := factory.Core().V1().Pods().Informer()
logger.Info("Pod informer starting", "node", node, "field_selector", "spec.nodeName="+node)
factory.Start(ctx.Done())
if !cache.WaitForCacheSync(ctx.Done(), inf.HasSynced) {
return nil, fmt.Errorf("pod informer cache failed to sync")
}
logger.Info("Pod informer synced", "node", node, "items", len(inf.GetStore().ListKeys()))
return &PodCache{store: inf.GetStore(), logger: logger}, nil
}
// Get looks up a Pod by namespace and name. Returns (nil, false) if absent.
func (c *PodCache) Get(namespace, name string) (*corev1.Pod, bool) {
key := namespace + "/" + name
obj, ok, err := c.store.GetByKey(key)
if err != nil || !ok || obj == nil {
return nil, false
}
pod, ok := obj.(*corev1.Pod)
if !ok {
return nil, false
}
return pod, true
}
// WaitForPod polls the cache for up to `timeout` for a pod to appear.
// kubelet may invoke CNI ADD slightly before the informer has observed the
// PodSpec, so this helper smooths the race.
func (c *PodCache) WaitForPod(ctx context.Context, namespace, name string, timeout time.Duration) (*corev1.Pod, error) {
deadline := time.Now().Add(timeout)
for {
if pod, ok := c.Get(namespace, name); ok {
return pod, nil
}
if time.Now().After(deadline) {
return nil, fmt.Errorf("pod %s/%s not found in informer cache after %s", namespace, name, timeout)
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(50 * time.Millisecond):
}
}
}