anycast: drop pods from nexthop set on DeletionTimestamp
Build flock Image / build (push) Has been cancelled
Build flock Image / build (push) Has been cancelled
Previously the AnycastReconciler kept a pod in the nexthop set as long as its PodReady condition was True. During a rolling restart that produces a window after kubelet has accepted SIGTERM (DeletionTimestamp set, pod still Ready until probes observe shutdown) where BGP still advertises a path through the dying pod's veth — in-flight requests get RST'd when the container actually exits. Fix: introduce podAnycastEligible(pod) = !DeletionTimestamp && Ready, swap it in at the AnycastReconciler's isReady callback, and fire the ready-change callback when DeletionTimestamp transitions (the informer UpdateFunc previously only fired on Ready transitions). Result: as soon as the apiserver marks a pod for deletion, the reconciler withdraws the local nexthop and BIRD reannounces the route without it. Sibling replicas absorb traffic before the pod's terminationGracePeriod elapses. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -130,7 +130,7 @@ func (r *AnycastReconciler) computeDesired() map[string]anycastTarget {
|
||||
r.Store.Snapshot(),
|
||||
func(ns, name string) bool {
|
||||
pod, ok := r.Pods.Get(ns, name)
|
||||
return ok && podReady(pod)
|
||||
return ok && podAnycastEligible(pod)
|
||||
},
|
||||
func(s string) { r.Logger.Warn(s) },
|
||||
)
|
||||
|
||||
+15
-2
@@ -28,6 +28,16 @@ func podReady(pod *corev1.Pod) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// podAnycastEligible reports whether a pod should contribute its IP as a
|
||||
// nexthop for its anycast IPs. A pod is eligible when it is Ready AND not
|
||||
// being deleted. Once the apiserver sets DeletionTimestamp, kubelet has
|
||||
// started teardown — kube-proxy will keep routing for terminationGracePeriod
|
||||
// but the pod is on the way out; we should withdraw the nexthop immediately
|
||||
// so BGP shifts traffic to a sibling before the pod actually exits.
|
||||
func podAnycastEligible(pod *corev1.Pod) bool {
|
||||
return pod.DeletionTimestamp == nil && podReady(pod)
|
||||
}
|
||||
|
||||
// PodCache exposes a Get(ns, name) lookup against a node-scoped Pod
|
||||
// informer. ADD/DEL handlers consult it to read annotations + labels for
|
||||
// IPAM and (later) NetworkPolicy. Callers can subscribe to Ready
|
||||
@@ -58,7 +68,7 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
|
||||
|
||||
_, _ = inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(obj interface{}) {
|
||||
if pod, ok := obj.(*corev1.Pod); ok && podReady(pod) {
|
||||
if pod, ok := obj.(*corev1.Pod); ok && podAnycastEligible(pod) {
|
||||
pc.fireReady()
|
||||
}
|
||||
},
|
||||
@@ -68,7 +78,10 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
|
||||
if oldP == nil || newP == nil {
|
||||
return
|
||||
}
|
||||
if podReady(oldP) != podReady(newP) {
|
||||
// Fire on Ready transition OR DeletionTimestamp transition.
|
||||
// The latter catches "pod was Ready, now being deleted" so the
|
||||
// reconciler withdraws the nexthop before the pod actually exits.
|
||||
if podAnycastEligible(oldP) != podAnycastEligible(newP) {
|
||||
pc.fireReady()
|
||||
}
|
||||
},
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func readyPod(deletionTimestamp *metav1.Time) *corev1.Pod {
|
||||
return &corev1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: deletionTimestamp},
|
||||
Status: corev1.PodStatus{
|
||||
Conditions: []corev1.PodCondition{
|
||||
{Type: corev1.PodReady, Status: corev1.ConditionTrue},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodAnycastEligible(t *testing.T) {
|
||||
now := metav1.Now()
|
||||
cases := []struct {
|
||||
name string
|
||||
pod *corev1.Pod
|
||||
want bool
|
||||
}{
|
||||
{"ready, not deleting", readyPod(nil), true},
|
||||
{"ready, but deleting", readyPod(&now), false},
|
||||
{
|
||||
"not ready, not deleting",
|
||||
&corev1.Pod{Status: corev1.PodStatus{Conditions: []corev1.PodCondition{
|
||||
{Type: corev1.PodReady, Status: corev1.ConditionFalse},
|
||||
}}},
|
||||
false,
|
||||
},
|
||||
{"no conditions, not deleting", &corev1.Pod{}, false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
if got := podAnycastEligible(c.pod); got != c.want {
|
||||
t.Fatalf("got %v want %v", got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user