anycast: drop pods from nexthop set on DeletionTimestamp
Build flock Image / build (push) Has been cancelled

Previously the AnycastReconciler kept a pod in the nexthop set as long as
its PodReady condition was True. During a rolling restart that produces a
window after kubelet has accepted SIGTERM (DeletionTimestamp set, pod
still Ready until probes observe shutdown) where BGP still advertises a
path through the dying pod's veth — in-flight requests get RST'd when
the container actually exits.

Fix: introduce podAnycastEligible(pod) = !DeletionTimestamp && Ready,
swap it in at the AnycastReconciler's isReady callback, and fire the
ready-change callback when DeletionTimestamp transitions (the informer
UpdateFunc previously only fired on Ready transitions).

Result: as soon as the apiserver marks a pod for deletion, the
reconciler withdraws the local nexthop and BIRD reannounces the route
without it. Sibling replicas absorb traffic before the pod's
terminationGracePeriod elapses.

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Donavan Fritz
2026-04-25 22:24:50 -05:00
parent e9d3eef2cc
commit c61b12204c
3 changed files with 62 additions and 3 deletions
+1 -1
View File
@@ -130,7 +130,7 @@ func (r *AnycastReconciler) computeDesired() map[string]anycastTarget {
r.Store.Snapshot(), r.Store.Snapshot(),
func(ns, name string) bool { func(ns, name string) bool {
pod, ok := r.Pods.Get(ns, name) pod, ok := r.Pods.Get(ns, name)
return ok && podReady(pod) return ok && podAnycastEligible(pod)
}, },
func(s string) { r.Logger.Warn(s) }, func(s string) { r.Logger.Warn(s) },
) )
+15 -2
View File
@@ -28,6 +28,16 @@ func podReady(pod *corev1.Pod) bool {
return false return false
} }
// podAnycastEligible reports whether a pod should contribute its IP as a
// nexthop for its anycast IPs. A pod is eligible when it is Ready AND not
// being deleted. Once the apiserver sets DeletionTimestamp, kubelet has
// started teardown — kube-proxy will keep routing for terminationGracePeriod
// but the pod is on the way out; we should withdraw the nexthop immediately
// so BGP shifts traffic to a sibling before the pod actually exits.
func podAnycastEligible(pod *corev1.Pod) bool {
return pod.DeletionTimestamp == nil && podReady(pod)
}
// PodCache exposes a Get(ns, name) lookup against a node-scoped Pod // PodCache exposes a Get(ns, name) lookup against a node-scoped Pod
// informer. ADD/DEL handlers consult it to read annotations + labels for // informer. ADD/DEL handlers consult it to read annotations + labels for
// IPAM and (later) NetworkPolicy. Callers can subscribe to Ready // IPAM and (later) NetworkPolicy. Callers can subscribe to Ready
@@ -58,7 +68,7 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
_, _ = inf.AddEventHandler(cache.ResourceEventHandlerFuncs{ _, _ = inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { AddFunc: func(obj interface{}) {
if pod, ok := obj.(*corev1.Pod); ok && podReady(pod) { if pod, ok := obj.(*corev1.Pod); ok && podAnycastEligible(pod) {
pc.fireReady() pc.fireReady()
} }
}, },
@@ -68,7 +78,10 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
if oldP == nil || newP == nil { if oldP == nil || newP == nil {
return return
} }
if podReady(oldP) != podReady(newP) { // Fire on Ready transition OR DeletionTimestamp transition.
// The latter catches "pod was Ready, now being deleted" so the
// reconciler withdraws the nexthop before the pod actually exits.
if podAnycastEligible(oldP) != podAnycastEligible(newP) {
pc.fireReady() pc.fireReady()
} }
}, },
+46
View File
@@ -0,0 +1,46 @@
package agent
import (
"testing"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func readyPod(deletionTimestamp *metav1.Time) *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: deletionTimestamp},
Status: corev1.PodStatus{
Conditions: []corev1.PodCondition{
{Type: corev1.PodReady, Status: corev1.ConditionTrue},
},
},
}
}
func TestPodAnycastEligible(t *testing.T) {
now := metav1.Now()
cases := []struct {
name string
pod *corev1.Pod
want bool
}{
{"ready, not deleting", readyPod(nil), true},
{"ready, but deleting", readyPod(&now), false},
{
"not ready, not deleting",
&corev1.Pod{Status: corev1.PodStatus{Conditions: []corev1.PodCondition{
{Type: corev1.PodReady, Status: corev1.ConditionFalse},
}}},
false,
},
{"no conditions, not deleting", &corev1.Pod{}, false},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if got := podAnycastEligible(c.pod); got != c.want {
t.Fatalf("got %v want %v", got, c.want)
}
})
}
}