anycast: kernel multipath route + L4 hash for multi-pod-per-node
Build flock Image / build (push) Has been cancelled
Build flock Image / build (push) Has been cancelled
Move pure resolver logic out of anycast_linux.go into anycast.go so it's
unit-testable on any host. Reshape anycastTarget from a single
{hostIface, via} into a sorted list of nexthops; multiple Ready pods on
the same node binding the same anycast IP now contribute one nexthop
each.
installAnycastRoute uses RTA_MULTIPATH (via netlink.Route.MultiPath)
when the target has more than one nexthop. Single-nexthop targets keep
the simple via-route shape so 1-pod-per-node keeps rendering identically
to today's production form in `ip route show`.
flock-agent writes net.ipv{4,6}.fib_multipath_hash_policy = 1 at
startup so the kernel hashes flows on (saddr, daddr, sport, dport, proto)
rather than just IPs. Best-effort — runs privileged in production, so
it works; falls back to L3 hash on environments where the write fails
(only matters for the multi-pod-per-node case anyway).
resolveAnycastTargets sorts nexthops by canonical(via) for stable
comparison so a quiet reconcile pass doesn't churn the kernel route.
8 new unit tests cover: 1-pod, 2-pods-same-anycast (multi-nexthop),
NotReady drop, no-Ready omits the IP, pending skipped, mixed v6+v4,
family mismatch warns, determinism.
Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,227 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"net"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// allReady is a convenience isReady that says yes to every pod.
|
||||
func allReady(_, _ string) bool { return true }
|
||||
|
||||
// readyOnly returns an isReady that only says yes to the named pods.
|
||||
func readyOnly(want ...string) func(string, string) bool {
|
||||
set := map[string]struct{}{}
|
||||
for _, n := range want {
|
||||
set[n] = struct{}{}
|
||||
}
|
||||
return func(_, name string) bool {
|
||||
_, ok := set[name]
|
||||
return ok
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAnycastTargets_OnePodOneAnycast(t *testing.T) {
|
||||
allocs := []Allocation{{
|
||||
ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||
State: StateCommitted,
|
||||
IP6: "2001:db8::1",
|
||||
Anycast: []string{"2001:db8:a::1"},
|
||||
}}
|
||||
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||
if len(out) != 1 {
|
||||
t.Fatalf("expected 1 anycast IP, got %d", len(out))
|
||||
}
|
||||
tgt, ok := out["2001:db8:a::1"]
|
||||
if !ok {
|
||||
t.Fatalf("missing target")
|
||||
}
|
||||
if len(tgt.nexthops) != 1 {
|
||||
t.Fatalf("expected 1 nexthop, got %d", len(tgt.nexthops))
|
||||
}
|
||||
if !tgt.nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||
t.Fatalf("nexthop via wrong: %v", tgt.nexthops[0].via)
|
||||
}
|
||||
}
|
||||
|
||||
// Two pods on the same node binding the same anycast IP must produce a
|
||||
// SINGLE target with TWO nexthops. The previous behaviour (overwriting)
|
||||
// was the bug this whole change exists to fix.
|
||||
func TestResolveAnycastTargets_TwoPodsSameAnycast_MultiNexthop(t *testing.T) {
|
||||
allocs := []Allocation{
|
||||
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||
State: StateCommitted, IP6: "2001:db8::2",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
{ContainerID: "c2", Namespace: "ns", PodName: "pod-b",
|
||||
State: StateCommitted, IP6: "2001:db8::1",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
}
|
||||
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||
tgt := out["2001:db8:a::1"]
|
||||
if len(tgt.nexthops) != 2 {
|
||||
t.Fatalf("expected 2 nexthops, got %d", len(tgt.nexthops))
|
||||
}
|
||||
// Order should be sorted by canonical(via) — ::1 before ::2.
|
||||
if !tgt.nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||
t.Fatalf("nexthops not sorted by via; got %v first", tgt.nexthops[0].via)
|
||||
}
|
||||
if !tgt.nexthops[1].via.Equal(net.ParseIP("2001:db8::2")) {
|
||||
t.Fatalf("nexthops not sorted by via; got %v second", tgt.nexthops[1].via)
|
||||
}
|
||||
// HostIface differs per pod (different containerID → different FNV).
|
||||
if tgt.nexthops[0].hostIface == tgt.nexthops[1].hostIface {
|
||||
t.Fatalf("expected distinct hostIfaces, both %q", tgt.nexthops[0].hostIface)
|
||||
}
|
||||
}
|
||||
|
||||
// When one of the contributing pods goes NotReady, only the remaining
|
||||
// Ready pod should appear in the target's nexthop set.
|
||||
func TestResolveAnycastTargets_NotReadyDropped(t *testing.T) {
|
||||
allocs := []Allocation{
|
||||
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||
State: StateCommitted, IP6: "2001:db8::1",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
{ContainerID: "c2", Namespace: "ns", PodName: "pod-b",
|
||||
State: StateCommitted, IP6: "2001:db8::2",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
}
|
||||
out := resolveAnycastTargets(allocs, readyOnly("pod-a"), nil)
|
||||
tgt := out["2001:db8:a::1"]
|
||||
if len(tgt.nexthops) != 1 {
|
||||
t.Fatalf("expected 1 nexthop after NotReady drop, got %d", len(tgt.nexthops))
|
||||
}
|
||||
if !tgt.nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||
t.Fatalf("wrong surviving nexthop: %v", tgt.nexthops[0].via)
|
||||
}
|
||||
}
|
||||
|
||||
// Pods that haven't reached Ready are excluded entirely from the target
|
||||
// set. If no pod is Ready for an anycast IP, that IP is absent from the
|
||||
// output (BIRD will withdraw from BGP, kernel route will be removed).
|
||||
func TestResolveAnycastTargets_NoReadyPodsOmitsIP(t *testing.T) {
|
||||
allocs := []Allocation{
|
||||
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||
State: StateCommitted, IP6: "2001:db8::1",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
}
|
||||
out := resolveAnycastTargets(allocs, readyOnly( /* none */ ), nil)
|
||||
if _, ok := out["2001:db8:a::1"]; ok {
|
||||
t.Fatalf("anycast should be absent when no pod ready")
|
||||
}
|
||||
}
|
||||
|
||||
// Pending allocations (CNI ADD partway through) are skipped even if the
|
||||
// pod is Ready — we don't program kernel routes for partial setups.
|
||||
func TestResolveAnycastTargets_PendingSkipped(t *testing.T) {
|
||||
allocs := []Allocation{
|
||||
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||
State: StatePending, IP6: "2001:db8::1",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
}
|
||||
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||
if len(out) != 0 {
|
||||
t.Fatalf("pending allocations must be skipped")
|
||||
}
|
||||
}
|
||||
|
||||
// Mixed v6+v4 anycast on the same pod produces two separate target
|
||||
// entries, one per family, each anchored on the matching unicast IP.
|
||||
func TestResolveAnycastTargets_MixedFamilies(t *testing.T) {
|
||||
allocs := []Allocation{{
|
||||
ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||
State: StateCommitted,
|
||||
IP6: "2001:db8::1",
|
||||
IP4: "10.0.0.1",
|
||||
Anycast: []string{"2001:db8:a::1", "10.255.0.1"},
|
||||
}}
|
||||
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||
if !out["2001:db8:a::1"].nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||
t.Fatalf("v6 anycast should resolve via v6 unicast")
|
||||
}
|
||||
if !out["10.255.0.1"].nexthops[0].via.Equal(net.ParseIP("10.0.0.1").To4()) {
|
||||
t.Fatalf("v4 anycast should resolve via v4 unicast")
|
||||
}
|
||||
}
|
||||
|
||||
// An anycast whose family has no matching unicast on the pod is skipped
|
||||
// with a warning. Other anycast IPs on the same pod are unaffected.
|
||||
func TestResolveAnycastTargets_FamilyMismatchWarns(t *testing.T) {
|
||||
allocs := []Allocation{{
|
||||
ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||
State: StateCommitted,
|
||||
IP6: "2001:db8::1", // v6 only
|
||||
Anycast: []string{"2001:db8:a::1", "10.255.0.1"},
|
||||
}}
|
||||
var warns []string
|
||||
out := resolveAnycastTargets(allocs, allReady, func(s string) { warns = append(warns, s) })
|
||||
if _, has := out["2001:db8:a::1"]; !has {
|
||||
t.Fatalf("v6 anycast should have been programmed")
|
||||
}
|
||||
if _, has := out["10.255.0.1"]; has {
|
||||
t.Fatalf("v4 anycast should have been skipped")
|
||||
}
|
||||
if len(warns) != 1 {
|
||||
t.Fatalf("expected 1 warning, got %d: %v", len(warns), warns)
|
||||
}
|
||||
if !strings.Contains(warns[0], "10.255.0.1") {
|
||||
t.Fatalf("warning should mention skipped IP: %q", warns[0])
|
||||
}
|
||||
}
|
||||
|
||||
// Determinism: the same input must produce nexthops in the same order.
|
||||
func TestResolveAnycastTargets_Determinism(t *testing.T) {
|
||||
allocs := []Allocation{
|
||||
{ContainerID: "z-late", Namespace: "ns", PodName: "z",
|
||||
State: StateCommitted, IP6: "2001:db8::5",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
{ContainerID: "a-early", Namespace: "ns", PodName: "a",
|
||||
State: StateCommitted, IP6: "2001:db8::3",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
{ContainerID: "m-mid", Namespace: "ns", PodName: "m",
|
||||
State: StateCommitted, IP6: "2001:db8::4",
|
||||
Anycast: []string{"2001:db8:a::1"}},
|
||||
}
|
||||
a := resolveAnycastTargets(allocs, allReady, nil)
|
||||
b := resolveAnycastTargets(allocs, allReady, nil)
|
||||
if !a["2001:db8:a::1"].equal(b["2001:db8:a::1"]) {
|
||||
t.Fatalf("same input produced unequal targets")
|
||||
}
|
||||
// Sorted by canonical(via): ::3, ::4, ::5
|
||||
via := a["2001:db8:a::1"].nexthops
|
||||
if !via[0].via.Equal(net.ParseIP("2001:db8::3")) ||
|
||||
!via[1].via.Equal(net.ParseIP("2001:db8::4")) ||
|
||||
!via[2].via.Equal(net.ParseIP("2001:db8::5")) {
|
||||
t.Fatalf("nexthops not stably sorted: %v %v %v", via[0].via, via[1].via, via[2].via)
|
||||
}
|
||||
}
|
||||
|
||||
// equal()'s contract — different orderings are still considered equal
|
||||
// AS LONG AS both sides have been canonicalised by resolveAnycastTargets.
|
||||
// Across-call comparisons of resolver outputs must always match for the
|
||||
// same logical input.
|
||||
func TestAnycastTarget_Equal(t *testing.T) {
|
||||
a := anycastTarget{nexthops: []anycastNexthop{
|
||||
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||
{hostIface: "f2", via: net.ParseIP("2001:db8::2")},
|
||||
}}
|
||||
b := anycastTarget{nexthops: []anycastNexthop{
|
||||
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||
{hostIface: "f2", via: net.ParseIP("2001:db8::2")},
|
||||
}}
|
||||
if !a.equal(b) {
|
||||
t.Fatalf("equal targets reported unequal")
|
||||
}
|
||||
c := anycastTarget{nexthops: []anycastNexthop{
|
||||
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||
}}
|
||||
if a.equal(c) {
|
||||
t.Fatalf("targets with different lengths reported equal")
|
||||
}
|
||||
d := anycastTarget{nexthops: []anycastNexthop{
|
||||
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||
{hostIface: "f2", via: net.ParseIP("2001:db8::3")}, // diff IP
|
||||
}}
|
||||
if a.equal(d) {
|
||||
t.Fatalf("targets with different vias reported equal")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user