package agent import ( "context" "fmt" "log/slog" "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" ) // fieldManager identifies flock-agent in apiserver field-manager bookkeeping. // Server-Side Apply only takes ownership of the fields we send, so other // managers (kubelet, kcm) keep their conditions untouched between our writes. const nodeStatusFieldManager = "flock-agent" // keepNetworkAvailable maintains a NetworkUnavailable=False condition on // the node's status. Calico-node sets this False while it owns CNI; on // shutdown it sets it to True with reason CalicoIsDown, which adds the // node.kubernetes.io/network-unavailable taint and blocks new scheduling. // Once flock-agent is in charge, we own that single condition. // // Uses Server-Side Apply against the status subresource. NodeStatus.Conditions // is a listType=map keyed by `type`, so SSA merges by type — our partial body // declares ownership of just the NetworkUnavailable entry and leaves the // kubelet-managed conditions (Ready, MemoryPressure, DiskPressure, PIDPressure) // alone. A prior implementation used JSON merge-patch with a one-element // conditions array, which the apiserver REPLACES (merge-patch on arrays is // whole-array semantics) — that race-stripped the kubelet conditions every // 60s and produced ~5s flickers in `kubectl get nodes`. // // Re-applies every minute (heartbeat-style) so a stale condition from a // previous CNI is overwritten without an explicit transition. func keepNetworkAvailable(ctx context.Context, cfg *rest.Config, node string, logger *slog.Logger) { cs, err := kubernetes.NewForConfig(cfg) if err != nil { logger.Warn("network-condition: kubernetes client", "err", err) return } apply := func() { now := metav1.Now().UTC().Format(time.RFC3339) // Hand-build the SSA body so we only declare the fields we own. // Force=true lets us reclaim the condition if a previous CNI's // finalizer/cleanup left it owned by a different manager. body := []byte(fmt.Sprintf(`{ "apiVersion": "v1", "kind": "Node", "metadata": {"name": %q}, "status": {"conditions": [{ "type": "NetworkUnavailable", "status": "False", "reason": "FlockReady", "message": "flock-agent owns CNI on this node", "lastHeartbeatTime": %q, "lastTransitionTime": %q }]} }`, node, now, now)) force := true _, err := cs.CoreV1().Nodes().Patch(ctx, node, types.ApplyPatchType, body, metav1.PatchOptions{FieldManager: nodeStatusFieldManager, Force: &force}, "status") if err != nil { logger.Warn("network-condition: ssa apply failed", "err", err) return } } apply() t := time.NewTicker(60 * time.Second) defer t.Stop() for { select { case <-ctx.Done(): return case <-t.C: apply() } } }