From 8d6e50c98049c26f336086e3ab53f1fd3c4ba423 Mon Sep 17 00:00:00 2001 From: Donavan Fritz Date: Fri, 8 May 2026 09:35:27 -0500 Subject: [PATCH] deploy: catch-all toleration so DS schedules on not-ready nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the explicit toleration list with `operator: Exists`. The previous list lacked node.kubernetes.io/not-ready:NoSchedule, so during a fresh control-plane join the CNI agent couldn't schedule until the node became Ready — but the node can't become Ready without the CNI. Surfaced during host001/host002 PERC migration rebuild. Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy/daemonset.yaml | 13 ++++--------- deploy/install.yaml | 17 ++++------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/deploy/daemonset.yaml b/deploy/daemonset.yaml index c130c3e..dbd104d 100644 --- a/deploy/daemonset.yaml +++ b/deploy/daemonset.yaml @@ -41,15 +41,10 @@ spec: nodeSelector: flock.fritzlab.net/agent: "" tolerations: - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - - key: node.kubernetes.io/not-ready - operator: Exists - effect: NoExecute - - key: node.kubernetes.io/unreachable - operator: Exists - effect: NoExecute + # CNI must schedule on a fresh node before it becomes Ready — + # the node has not-ready:NoSchedule until flock installs the CNI conflist. + # Catch-all tolerates all taints so the agent always runs. + - operator: Exists initContainers: - name: install-cni image: code.fritzlab.net/fritzlab/flock:latest diff --git a/deploy/install.yaml b/deploy/install.yaml index 4eb0252..40175b4 100644 --- a/deploy/install.yaml +++ b/deploy/install.yaml @@ -182,19 +182,10 @@ spec: nodeSelector: flock.fritzlab.net/agent: "" tolerations: - - key: fritzlab.net/cni-test - operator: Equal - value: "true" - effect: NoSchedule - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - - key: node.kubernetes.io/not-ready - operator: Exists - effect: NoExecute - - key: node.kubernetes.io/unreachable - operator: Exists - effect: NoExecute + # CNI must schedule on a fresh node before it becomes Ready — + # the node has not-ready:NoSchedule until flock installs the CNI conflist. + # Catch-all tolerates all taints so the agent always runs. + - operator: Exists initContainers: - name: install-cni image: code.fritzlab.net/fritzlab/flock:latest