diff --git a/pkg/agent/annotations.go b/pkg/agent/annotations.go index 56a3765..062c1f1 100644 --- a/pkg/agent/annotations.go +++ b/pkg/agent/annotations.go @@ -299,8 +299,8 @@ func tryParseIPAlgo(s string) []embed.Field { switch part { case string(embed.FieldNamespace): f = embed.FieldNamespace - case string(embed.FieldPod): - f = embed.FieldPod + case string(embed.FieldApp): + f = embed.FieldApp case string(embed.FieldImage): f = embed.FieldImage default: diff --git a/pkg/agent/annotations_test.go b/pkg/agent/annotations_test.go index 40f30ed..edf87d6 100644 --- a/pkg/agent/annotations_test.go +++ b/pkg/agent/annotations_test.go @@ -179,10 +179,10 @@ func TestParseAnnotations_BoolCaseInsensitive(t *testing.T) { // "all three mean unset". func TestResolveIPAlgo_PodWins(t *testing.T) { - pod := map[string]string{annotationPrefix + annIPAlgo: "namespace,pod"} + pod := map[string]string{annotationPrefix + annIPAlgo: "namespace,app"} node := map[string]string{annotationPrefix + annIPAlgo: "image"} got := ResolveIPAlgo(pod, node, nil) - want := []embed.Field{embed.FieldNamespace, embed.FieldPod} + want := []embed.Field{embed.FieldNamespace, embed.FieldApp} if !equalFields(got, want) { t.Fatalf("got %v, want %v", got, want) } @@ -210,9 +210,9 @@ func TestResolveIPAlgo_PodEmptyFallsToNode(t *testing.T) { func TestResolveIPAlgo_PodInvalidFallsToNode(t *testing.T) { for _, podVal := range []string{"namespace,bogus", "ns", ",", "namespace,namespace"} { pod := map[string]string{annotationPrefix + annIPAlgo: podVal} - node := map[string]string{annotationPrefix + annIPAlgo: "pod"} + node := map[string]string{annotationPrefix + annIPAlgo: "app"} got := ResolveIPAlgo(pod, node, nil) - want := []embed.Field{embed.FieldPod} + want := []embed.Field{embed.FieldApp} if !equalFields(got, want) { t.Fatalf("podVal=%q: got %v, want %v", podVal, got, want) } @@ -243,16 +243,16 @@ func TestResolveIPAlgo_NilNodeMap(t *testing.T) { } func TestResolveIPAlgo_Whitespace(t *testing.T) { - pod := map[string]string{annotationPrefix + annIPAlgo: " namespace , pod "} + pod := map[string]string{annotationPrefix + annIPAlgo: " namespace , app "} got := ResolveIPAlgo(pod, nil, nil) - want := []embed.Field{embed.FieldNamespace, embed.FieldPod} + want := []embed.Field{embed.FieldNamespace, embed.FieldApp} if !equalFields(got, want) { t.Fatalf("got %v, want %v", got, want) } } func TestResolveIPAlgo_DuplicateInvalidates(t *testing.T) { - pod := map[string]string{annotationPrefix + annIPAlgo: "pod,pod"} + pod := map[string]string{annotationPrefix + annIPAlgo: "app,app"} node := map[string]string{annotationPrefix + annIPAlgo: "namespace"} got := ResolveIPAlgo(pod, node, nil) want := []embed.Field{embed.FieldNamespace} diff --git a/pkg/agent/handlers.go b/pkg/agent/handlers.go index 4d1c021..619a6be 100644 --- a/pkg/agent/handlers.go +++ b/pkg/agent/handlers.go @@ -5,13 +5,89 @@ import ( "fmt" "log/slog" "net" + "strings" "time" flockcni "code.fritzlab.net/fritzlab/flock/pkg/cni" cnitypes "github.com/containernetworking/cni/pkg/types" current "github.com/containernetworking/cni/pkg/types/100" + corev1 "k8s.io/api/core/v1" ) +// podTemplateHashLabel is the well-known label Kubernetes attaches to +// every Pod owned by a ReplicaSet so the ReplicaSet name can be +// reconstructed as "-". We use it to peel the hash back off +// in deriveAppName. +const podTemplateHashLabel = "pod-template-hash" + +// deriveAppName returns the stable workload identifier for a Pod — the +// name of the topmost stable controller, with the pod-template-hash +// stripped for ReplicaSet-owned pods. +// +// The rule maps to Kubernetes pod-name generation: +// +// Deployment → ReplicaSet → Pod pod owner is RS named "-"; +// strip the trailing "-" to recover +// the Deployment name. +// StatefulSet → Pod pod owner is the STS itself; use as-is. +// DaemonSet → Pod pod owner is the DS itself; use as-is. +// Job → Pod pod owner is the Job itself; use as-is. +// (bare pod) → Pod no controller owner; fall back to pod name. +// +// All replicas of the same workload converge on the same return value, +// which is the property the ip-algo `app` field needs. +func deriveAppName(pod *corev1.Pod) string { + owner := controllerOwner(pod) + if owner == nil { + return pod.Name + } + if owner.Kind == "ReplicaSet" { + if hash, ok := pod.Labels[podTemplateHashLabel]; ok && hash != "" { + suffix := "-" + hash + if strings.HasSuffix(owner.Name, suffix) { + return strings.TrimSuffix(owner.Name, suffix) + } + } + // Custom controller named the RS something that doesn't match + // the pod-template-hash convention. Falling back to the RS name + // keeps replicas of the same RS aligned, which is the second- + // best correctness we can offer. + return owner.Name + } + return owner.Name +} + +// controllerOwner returns the OwnerReference flagged with Controller=true, +// or nil if none. Kubernetes guarantees at most one controller per object. +func controllerOwner(pod *corev1.Pod) *metav1OwnerLite { + for i := range pod.OwnerReferences { + o := &pod.OwnerReferences[i] + if o.Controller != nil && *o.Controller { + return &metav1OwnerLite{Kind: o.Kind, Name: o.Name} + } + } + return nil +} + +// metav1OwnerLite is the slice of OwnerReference we actually consult, +// kept tiny so it can be returned by value-pointer cheaply. +type metav1OwnerLite struct { + Kind string + Name string +} + +// podImageRef returns a deterministic image reference for the embed +// `image` field. We use the first container's spec'd image — this is +// stable across replicas of the same Deployment without requiring the +// runtime-resolved digest. Empty string if the pod has no containers, +// in which case the embed package falls back to FNV(containerID). +func podImageRef(pod *corev1.Pod) string { + if len(pod.Spec.Containers) == 0 { + return "" + } + return pod.Spec.Containers[0].Image +} + // PodHandler is the platform-agnostic ADD/DEL/CHECK implementation. It // resolves the Pod from the informer cache, parses annotations, allocates // from IPAM, programs netns (or skips on non-Linux build), and persists @@ -68,11 +144,13 @@ func (h *PodHandler) Add(ctx context.Context, req flockcni.Request) (*current.Re ContainerID: req.ContainerID, Namespace: args.PodNamespace, Pod: args.PodName, + App: deriveAppName(pod), WantV6: parsed.WantV6, WantV4: parsed.WantV4, AnnCIDR6: parsed.CIDR6, AnnCIDR4: parsed.CIDR4, IPAlgo: ipAlgo, + Image: podImageRef(pod), } res, err := h.IPAM.Allocate(allocReq) if err != nil { diff --git a/pkg/agent/handlers_test.go b/pkg/agent/handlers_test.go new file mode 100644 index 0000000..9629fb5 --- /dev/null +++ b/pkg/agent/handlers_test.go @@ -0,0 +1,108 @@ +package agent + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func ptrBool(b bool) *bool { return &b } + +func mkPod(name string, owner *metav1.OwnerReference, labels map[string]string, image string) *corev1.Pod { + p := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: name, Labels: labels}, + } + if owner != nil { + p.OwnerReferences = []metav1.OwnerReference{*owner} + } + if image != "" { + p.Spec.Containers = []corev1.Container{{Image: image}} + } + return p +} + +func TestDeriveAppName_DeploymentReplicaSet(t *testing.T) { + owner := &metav1.OwnerReference{ + Kind: "ReplicaSet", + Name: "traefik-789df685f", + Controller: ptrBool(true), + } + pod := mkPod("traefik-789df685f-hqvfl", owner, + map[string]string{podTemplateHashLabel: "789df685f"}, "") + if got := deriveAppName(pod); got != "traefik" { + t.Fatalf("got %q, want %q", got, "traefik") + } +} + +func TestDeriveAppName_StatefulSet(t *testing.T) { + owner := &metav1.OwnerReference{ + Kind: "StatefulSet", + Name: "gitea", + Controller: ptrBool(true), + } + pod := mkPod("gitea-0", owner, nil, "") + if got := deriveAppName(pod); got != "gitea" { + t.Fatalf("got %q, want %q", got, "gitea") + } +} + +func TestDeriveAppName_DaemonSet(t *testing.T) { + owner := &metav1.OwnerReference{ + Kind: "DaemonSet", + Name: "flock-agent", + Controller: ptrBool(true), + } + pod := mkPod("flock-agent-abcde", owner, nil, "") + if got := deriveAppName(pod); got != "flock-agent" { + t.Fatalf("got %q, want %q", got, "flock-agent") + } +} + +func TestDeriveAppName_BarePod(t *testing.T) { + pod := mkPod("standalone", nil, nil, "") + if got := deriveAppName(pod); got != "standalone" { + t.Fatalf("got %q, want %q", got, "standalone") + } +} + +// TestDeriveAppName_RSWithoutTemplateHash — ReplicaSet owners that don't +// follow the standard "-" naming convention (e.g. a custom +// controller) keep the RS name as-is. All replicas of that RS still align, +// which is the second-best correctness offer. +func TestDeriveAppName_RSWithoutTemplateHash(t *testing.T) { + owner := &metav1.OwnerReference{ + Kind: "ReplicaSet", + Name: "weird-rs-name", + Controller: ptrBool(true), + } + pod := mkPod("weird-rs-name-xyz", owner, nil, "") + if got := deriveAppName(pod); got != "weird-rs-name" { + t.Fatalf("got %q, want %q", got, "weird-rs-name") + } +} + +func TestDeriveAppName_NonControllerOwnerIgnored(t *testing.T) { + // OwnerReference without Controller=true must be ignored — only the + // controller owner is the canonical workload. + owner := &metav1.OwnerReference{ + Kind: "Foo", + Name: "irrelevant", + // Controller pointer left nil. + } + pod := mkPod("solo", owner, nil, "") + if got := deriveAppName(pod); got != "solo" { + t.Fatalf("got %q, want %q", got, "solo") + } +} + +func TestPodImageRef(t *testing.T) { + pod := mkPod("p", nil, nil, "traefik:v3.5") + if got := podImageRef(pod); got != "traefik:v3.5" { + t.Fatalf("got %q, want %q", got, "traefik:v3.5") + } + empty := mkPod("p", nil, nil, "") + if got := podImageRef(empty); got != "" { + t.Fatalf("got %q, want \"\"", got) + } +} diff --git a/pkg/agent/ipam.go b/pkg/agent/ipam.go index 6b447e0..746eef0 100644 --- a/pkg/agent/ipam.go +++ b/pkg/agent/ipam.go @@ -67,7 +67,13 @@ func (cryptoRand) PickIndex(n int) int { type AllocRequest struct { ContainerID string Namespace string - Pod string + // Pod is the literal pod name (used for logging only — not embedded). + Pod string + // App is the stable workload identity for the FieldApp embed field — + // typically the owning Deployment / StatefulSet / DaemonSet name. + // Computed by the handler; falls back to Pod when no usable owner is + // found (bare pods). + App string // WantV6 / WantV4 are the post-merge address family selection (pod // annotation > NodeConfig.Spec.Defaults > built-in baseline of // dual-stack). At least one MUST be true; Allocate rejects the request @@ -78,12 +84,14 @@ type AllocRequest struct { // means "use any of the node's CIDRs". AnnCIDR6 []*net.IPNet AnnCIDR4 []*net.IPNet - // IPAlgo comes from the ip-algo annotation. Empty means random IID. + // IPAlgo comes from the resolved ip-algo precedence chain. Empty means + // random IID. IPAlgo []embed.Field - // ImageDigest is the sha256 manifest digest (with or without "sha256:" - // prefix). If empty, embed.Values.ImageFallback = ContainerID is used - // for ip-algo fields that reference image. - ImageDigest string + // Image is the spec'd image reference (typically + // pod.Spec.Containers[0].Image). When 64 hex chars, treated as a + // sha256 digest; otherwise FNV-1a-64'd as a string. Empty falls back + // to FNV(ContainerID) for ip-algo fields that reference image. + Image string } // AllocResult is what the IPAM hands back to the CNI ADD. @@ -210,8 +218,8 @@ func (i *IPAM) allocV6(cidr *net.IPNet, req AllocRequest) (net.IP, error) { } else { ip, err = embed.Embed(cidr, req.IPAlgo, embed.Values{ Namespace: req.Namespace, - Pod: req.Pod, - Image: req.ImageDigest, + App: req.App, + Image: req.Image, ImageFallback: req.ContainerID, }, i.randSrc.NibbleN()) } diff --git a/pkg/agent/ipam_test.go b/pkg/agent/ipam_test.go index 02eebec..1234ea7 100644 --- a/pkg/agent/ipam_test.go +++ b/pkg/agent/ipam_test.go @@ -148,8 +148,8 @@ func TestIPAM_AllocV6_WithEmbed(t *testing.T) { } i.randSrc = &fakeRand{nibbles: []byte{0xe}} res, err := i.Allocate(AllocRequest{ - ContainerID: "c1", Namespace: "mail", Pod: "stalwart-0", WantV6: true, - IPAlgo: []embed.Field{embed.FieldNamespace, embed.FieldPod, embed.FieldImage}, + ContainerID: "c1", Namespace: "mail", Pod: "stalwart-0", App: "stalwart", WantV6: true, + IPAlgo: []embed.Field{embed.FieldNamespace, embed.FieldApp, embed.FieldImage}, }) if err != nil { t.Fatalf("Allocate: %v", err) diff --git a/pkg/embed/suffix.go b/pkg/embed/suffix.go index 7d34cb0..3692093 100644 --- a/pkg/embed/suffix.go +++ b/pkg/embed/suffix.go @@ -1,5 +1,5 @@ -// Package embed implements ip-algo: deterministic embedding of pod identity -// (namespace, pod name, image digest) into the host portion of an IPv6 +// Package embed implements ip-algo: deterministic embedding of workload +// identity (namespace, app name, image) into the host portion of an IPv6 // address. The mapping is operator-friendly cosmetics — NOT a security // boundary. See dfritz-cni.md "IPv6 IID Embedding" for the full spec. package embed @@ -17,18 +17,27 @@ type Field string const ( FieldNamespace Field = "namespace" - FieldPod Field = "pod" + FieldApp Field = "app" FieldImage Field = "image" ) -// Values carries the inputs for one embedding call. Image holds the SHA-256 -// manifest digest as 64 hex chars when known; otherwise pass the containerID -// in ImageFallback and we'll FNV-1a-64 it. +// Values carries the inputs for one embedding call. +// +// App is the stable workload identifier — typically the owning Deployment / +// StatefulSet / DaemonSet name (callers strip the pod-template-hash from +// ReplicaSet names before passing it in). Caller is responsible for picking +// the right level of stability; this package just hashes whatever it gets. +// +// Image is whatever string the caller wants embedded for the image field; +// the most common choice is pod.Spec.Containers[0].Image (the spec'd +// reference). If the caller passes a 64-hex-char SHA-256 digest, the top +// bits are taken as a hex value; otherwise it is FNV-1a-64'd as a plain +// string. ImageFallback is used only when Image == "". type Values struct { - Namespace string - Pod string - Image string // 64-char hex sha256 manifest digest, or empty - ImageFallback string // typically containerID, used when Image=="". + Namespace string + App string + Image string // sha256 hex (64 chars), or any string to FNV; empty → fallback + ImageFallback string // typically containerID, used when Image=="". } // MaxFieldNibbles is the largest single-field width supported by this @@ -127,13 +136,22 @@ func fieldValue(f Field, v Values, bits int) (uint64, error) { switch f { case FieldNamespace: return topBitsFNV(v.Namespace, bits), nil - case FieldPod: - return topBitsFNV(v.Pod, bits), nil + case FieldApp: + return topBitsFNV(v.App, bits), nil case FieldImage: - if v.Image != "" { + if v.Image == "" { + return topBitsFNV(v.ImageFallback, bits), nil + } + // SHA-256 manifest digests are exactly 64 hex chars (with optional + // "sha256:" prefix). Anything else — image:tag references like + // "traefik:v3", or short SHAs — gets FNV-1a-64'd as a string. This + // preserves the original digest behaviour while letting callers + // pass pod.Spec.Containers[0].Image directly. + s := strings.TrimPrefix(v.Image, "sha256:") + if len(s) == 64 && isHex(s) { return topBitsHex(v.Image, bits) } - return topBitsFNV(v.ImageFallback, bits), nil + return topBitsFNV(v.Image, bits), nil default: return 0, fmt.Errorf("unknown field %q", f) } @@ -163,6 +181,21 @@ func topBitsHex(s string, bits int) (uint64, error) { return v >> uint(64-bits), nil } +// isHex reports whether every byte in s is a valid hex digit. +func isHex(s string) bool { + for i := 0; i < len(s); i++ { + c := s[i] + switch { + case c >= '0' && c <= '9': + case c >= 'a' && c <= 'f': + case c >= 'A' && c <= 'F': + default: + return false + } + } + return true +} + // writeNibble sets the (nibIdx)-th nibble of addr (0 = highest nibble of byte 0). func writeNibble(addr net.IP, nibIdx int, nb byte) { bytePos := nibIdx / 2 diff --git a/pkg/embed/suffix_fuzz_test.go b/pkg/embed/suffix_fuzz_test.go index 3f3e9f3..774dac1 100644 --- a/pkg/embed/suffix_fuzz_test.go +++ b/pkg/embed/suffix_fuzz_test.go @@ -11,29 +11,30 @@ func FuzzEmbed(f *testing.F) { type seed struct { prefix string fields string // comma-separated, mapped below to []Field - ns, pod string + ns, app string image string fallback string nNibble byte } for _, s := range []seed{ - {"2602:817:3000:f001::/64", "namespace,pod,image", "mail", "stalwart-0", "", "ctr", 0xe}, - {"2001:db8::/64", "namespace", "ns", "p", "", "", 0}, - {"2001:db8::/96", "pod", "", "podname", "", "ctr", 0xf}, - {"2001:db8::/48", "namespace,pod", "ns", "p", "", "ctr", 0x1}, - {"2001:db8::/120", "namespace", "n", "p", "", "ctr", 0x0}, // 8 host nibbles - {"2001:db8::/124", "namespace", "n", "p", "", "ctr", 0x0}, // 4 host nibbles - {"2001:db8::/127", "namespace", "n", "p", "", "ctr", 0x0}, // not nibble-aligned - {"2001:db8::/63", "namespace", "n", "p", "", "ctr", 0x0}, // not nibble-aligned - {"2001:db8::/64", "namespace,pod,image", "", "", "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011", "", 0xa}, - {"2001:db8::/64", "namespace,pod,image", "", "", "", "ctr", 0xa}, + {"2602:817:3000:f001::/64", "namespace,app,image", "mail", "stalwart", "", "ctr", 0xe}, + {"2001:db8::/64", "namespace", "ns", "a", "", "", 0}, + {"2001:db8::/96", "app", "", "appname", "", "ctr", 0xf}, + {"2001:db8::/48", "namespace,app", "ns", "a", "", "ctr", 0x1}, + {"2001:db8::/120", "namespace", "n", "a", "", "ctr", 0x0}, // 8 host nibbles + {"2001:db8::/124", "namespace", "n", "a", "", "ctr", 0x0}, // 4 host nibbles + {"2001:db8::/127", "namespace", "n", "a", "", "ctr", 0x0}, // not nibble-aligned + {"2001:db8::/63", "namespace", "n", "a", "", "ctr", 0x0}, // not nibble-aligned + {"2001:db8::/64", "namespace,app,image", "", "", "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011", "", 0xa}, + {"2001:db8::/64", "namespace,app,image", "", "", "traefik:v3.5", "ctr", 0xa}, + {"2001:db8::/64", "namespace,app,image", "", "", "", "ctr", 0xa}, {"2001:db8::/64", "namespace", "🦆", "🐧", "", "", 0}, - {"2001:db8::/64", "namespace", "ns\x00\x00", "p", "", "", 0}, + {"2001:db8::/64", "namespace", "ns\x00\x00", "a", "", "", 0}, } { - f.Add(s.prefix, s.fields, s.ns, s.pod, s.image, s.fallback, s.nNibble) + f.Add(s.prefix, s.fields, s.ns, s.app, s.image, s.fallback, s.nNibble) } - f.Fuzz(func(t *testing.T, prefix, fieldsStr, ns, pod, image, fallback string, nNibble byte) { + f.Fuzz(func(t *testing.T, prefix, fieldsStr, ns, app, image, fallback string, nNibble byte) { _, network, err := net.ParseCIDR(prefix) if err != nil { return @@ -44,7 +45,7 @@ func FuzzEmbed(f *testing.F) { } got, err := Embed(network, fields, Values{ Namespace: ns, - Pod: pod, + App: app, Image: image, ImageFallback: fallback, }, nNibble) @@ -74,8 +75,8 @@ func decodeFields(s string) ([]Field, bool) { switch string(cur) { case string(FieldNamespace): out = append(out, FieldNamespace) - case string(FieldPod): - out = append(out, FieldPod) + case string(FieldApp): + out = append(out, FieldApp) case string(FieldImage): out = append(out, FieldImage) default: diff --git a/pkg/embed/suffix_test.go b/pkg/embed/suffix_test.go index 6106f36..95791df 100644 --- a/pkg/embed/suffix_test.go +++ b/pkg/embed/suffix_test.go @@ -70,8 +70,8 @@ func TestEmbed_Slash64Deterministic(t *testing.T) { // /64 with 3 fields: 5+5+5+1 nibbles = 64-bit IID. net64 := mustCIDR(t, "2602:817:3000:f001::/64") addr, err := Embed(net64, - []Field{FieldNamespace, FieldPod, FieldImage}, - Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"}, + []Field{FieldNamespace, FieldApp, FieldImage}, + Values{Namespace: "mail", App: "stalwart", ImageFallback: "container-abc"}, 0xe, ) if err != nil { @@ -79,8 +79,8 @@ func TestEmbed_Slash64Deterministic(t *testing.T) { } // Property: same inputs → same output (twice). addr2, err := Embed(net64, - []Field{FieldNamespace, FieldPod, FieldImage}, - Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"}, + []Field{FieldNamespace, FieldApp, FieldImage}, + Values{Namespace: "mail", App: "stalwart", ImageFallback: "container-abc"}, 0xe, ) if err != nil { @@ -101,8 +101,8 @@ func TestEmbed_Slash64Deterministic(t *testing.T) { func TestEmbed_DifferentInputsDifferentOutputs(t *testing.T) { net64 := mustCIDR(t, "2602:817:3000:f001::/64") - a, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns1", Pod: "p1"}, 0) - b, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns2", Pod: "p1"}, 0) + a, _ := Embed(net64, []Field{FieldNamespace, FieldApp}, Values{Namespace: "ns1", App: "p1"}, 0) + b, _ := Embed(net64, []Field{FieldNamespace, FieldApp}, Values{Namespace: "ns2", App: "p1"}, 0) if a.Equal(b) { t.Fatalf("different namespace produced identical IID: %s", a) }