anycast: drop pods from nexthop set on DeletionTimestamp
Build flock Image / build (push) Has been cancelled
Build flock Image / build (push) Has been cancelled
Previously the AnycastReconciler kept a pod in the nexthop set as long as its PodReady condition was True. During a rolling restart that produces a window after kubelet has accepted SIGTERM (DeletionTimestamp set, pod still Ready until probes observe shutdown) where BGP still advertises a path through the dying pod's veth — in-flight requests get RST'd when the container actually exits. Fix: introduce podAnycastEligible(pod) = !DeletionTimestamp && Ready, swap it in at the AnycastReconciler's isReady callback, and fire the ready-change callback when DeletionTimestamp transitions (the informer UpdateFunc previously only fired on Ready transitions). Result: as soon as the apiserver marks a pod for deletion, the reconciler withdraws the local nexthop and BIRD reannounces the route without it. Sibling replicas absorb traffic before the pod's terminationGracePeriod elapses. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -130,7 +130,7 @@ func (r *AnycastReconciler) computeDesired() map[string]anycastTarget {
|
|||||||
r.Store.Snapshot(),
|
r.Store.Snapshot(),
|
||||||
func(ns, name string) bool {
|
func(ns, name string) bool {
|
||||||
pod, ok := r.Pods.Get(ns, name)
|
pod, ok := r.Pods.Get(ns, name)
|
||||||
return ok && podReady(pod)
|
return ok && podAnycastEligible(pod)
|
||||||
},
|
},
|
||||||
func(s string) { r.Logger.Warn(s) },
|
func(s string) { r.Logger.Warn(s) },
|
||||||
)
|
)
|
||||||
|
|||||||
+15
-2
@@ -28,6 +28,16 @@ func podReady(pod *corev1.Pod) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// podAnycastEligible reports whether a pod should contribute its IP as a
|
||||||
|
// nexthop for its anycast IPs. A pod is eligible when it is Ready AND not
|
||||||
|
// being deleted. Once the apiserver sets DeletionTimestamp, kubelet has
|
||||||
|
// started teardown — kube-proxy will keep routing for terminationGracePeriod
|
||||||
|
// but the pod is on the way out; we should withdraw the nexthop immediately
|
||||||
|
// so BGP shifts traffic to a sibling before the pod actually exits.
|
||||||
|
func podAnycastEligible(pod *corev1.Pod) bool {
|
||||||
|
return pod.DeletionTimestamp == nil && podReady(pod)
|
||||||
|
}
|
||||||
|
|
||||||
// PodCache exposes a Get(ns, name) lookup against a node-scoped Pod
|
// PodCache exposes a Get(ns, name) lookup against a node-scoped Pod
|
||||||
// informer. ADD/DEL handlers consult it to read annotations + labels for
|
// informer. ADD/DEL handlers consult it to read annotations + labels for
|
||||||
// IPAM and (later) NetworkPolicy. Callers can subscribe to Ready
|
// IPAM and (later) NetworkPolicy. Callers can subscribe to Ready
|
||||||
@@ -58,7 +68,7 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
|
|||||||
|
|
||||||
_, _ = inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
_, _ = inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||||
AddFunc: func(obj interface{}) {
|
AddFunc: func(obj interface{}) {
|
||||||
if pod, ok := obj.(*corev1.Pod); ok && podReady(pod) {
|
if pod, ok := obj.(*corev1.Pod); ok && podAnycastEligible(pod) {
|
||||||
pc.fireReady()
|
pc.fireReady()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -68,7 +78,10 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
|
|||||||
if oldP == nil || newP == nil {
|
if oldP == nil || newP == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if podReady(oldP) != podReady(newP) {
|
// Fire on Ready transition OR DeletionTimestamp transition.
|
||||||
|
// The latter catches "pod was Ready, now being deleted" so the
|
||||||
|
// reconciler withdraws the nexthop before the pod actually exits.
|
||||||
|
if podAnycastEligible(oldP) != podAnycastEligible(newP) {
|
||||||
pc.fireReady()
|
pc.fireReady()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func readyPod(deletionTimestamp *metav1.Time) *corev1.Pod {
|
||||||
|
return &corev1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: deletionTimestamp},
|
||||||
|
Status: corev1.PodStatus{
|
||||||
|
Conditions: []corev1.PodCondition{
|
||||||
|
{Type: corev1.PodReady, Status: corev1.ConditionTrue},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPodAnycastEligible(t *testing.T) {
|
||||||
|
now := metav1.Now()
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
pod *corev1.Pod
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"ready, not deleting", readyPod(nil), true},
|
||||||
|
{"ready, but deleting", readyPod(&now), false},
|
||||||
|
{
|
||||||
|
"not ready, not deleting",
|
||||||
|
&corev1.Pod{Status: corev1.PodStatus{Conditions: []corev1.PodCondition{
|
||||||
|
{Type: corev1.PodReady, Status: corev1.ConditionFalse},
|
||||||
|
}}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{"no conditions, not deleting", &corev1.Pod{}, false},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
if got := podAnycastEligible(c.pod); got != c.want {
|
||||||
|
t.Fatalf("got %v want %v", got, c.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user