Files
flock/pkg/agent/annotations.go
T
Donavan Fritz a17d33e182
Build flock Image / build (push) Successful in 5m27s
agent: addresses annotation replaces IPAM allocation
When flock.fritzlab.net/addresses provides a v6 or v4, the IP becomes
the pod's primary IP for that family — bound to eth0, default route off
it, on-link host route via setHostRoute, and a per-pod /128 or /32 in
BGP. IPAM no longer allocates a private IP alongside it. The pod ends up
with exactly the operator-supplied addresses on eth0 (plus any extras
beyond the first-of-family, which keep the pre-existing layered
behavior).

This is the fix the original addresses-annotation work missed: bug #1
allocated a private IP next to the public one (so VPN-routed clients
could land on the private path on Plex). Promoting addresses-supplied
IPs into the IPAM-style routing slot keeps the public IP as the only
primary IP visible from outside.

Three pieces:
- annotations.go: reject pods whose addresses/anycast IP family is
  disabled (ipv6/ipv4 annotation or NodeConfig default). Both annotation
  types rely on the family being enabled for return-path routing.
- handlers.go: peel first v6 + first v4 from Addresses into res.IP6/IP4;
  suppress IPAM for those families; skip IPAM call entirely if both
  families are addresses-supplied.
- anycast_linux.go: extend renderBird to advertise any IPAM IP that's
  outside the node's BGP aggregate as a per-pod /32 or /128. This is
  what makes 142.202.202.166 reachable when host004's pod CIDR is
  172.25.214.0/24 — the addresses-promoted IP isn't covered by the
  aggregate.

Tests: 7 new annotation tests covering the conflict cases (ipv4=false +
addresses-v4, NodeConfig default + addresses-v4, etc.) plus 5 unit tests
for the splitAddressesPrimary helper.

README updated with the addresses-replaces-IPAM behavior, the
addresses-vs-anycast comparison, the conflict rule, and a Plex-style
example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 09:46:48 -05:00

406 lines
13 KiB
Go

package agent
import (
"fmt"
"log/slog"
"net"
"strings"
flockv1alpha1 "code.fritzlab.net/fritzlab/flock/pkg/api/v1alpha1"
"code.fritzlab.net/fritzlab/flock/pkg/embed"
)
// annotationPrefix is the namespace under which all flock pod annotations
// live. Anything not starting with this prefix is ignored by the parser.
const annotationPrefix = "flock.fritzlab.net/"
// Recognised annotation keys (without the prefix).
const (
annIPv6 = "ipv6"
annIPv4 = "ipv4"
annCIDR6 = "cidr6"
annCIDR4 = "cidr4"
annIPAlgo = "ip-algo"
annAnycast = "anycast"
annAddresses = "addresses"
)
// FamilyDefaults is the per-call baseline for whether a pod receives an IPv6
// and/or IPv4 address. It is the merge of:
//
// 1. flock's built-in baseline (IPv6=true, IPv4=true — dual-stack), then
// 2. any NodeConfig.Spec.Defaults override the operator has applied to
// the local node.
//
// Pod-level `flock.fritzlab.net/ipv{6,4}` annotations override this baseline.
//
// Use FamilyDefaultsFromNodeConfig to compute a value from a NodeConfig,
// or BuiltinFamilyDefaults() if no NodeConfig is in scope.
type FamilyDefaults struct {
// WantV6 is the default-on value for IPv6 inclusion when the pod has no
// explicit ipv6 annotation.
WantV6 bool
// WantV4 is the default-on value for IPv4 inclusion when the pod has no
// explicit ipv4 annotation.
WantV4 bool
}
// BuiltinFamilyDefaults returns flock's hard-coded fallback: dual-stack
// (IPv6 + IPv4). This is the policy applied when no NodeConfig override is
// in effect. Pods that want a single family explicitly opt out via the
// `flock.fritzlab.net/ipv6` or `flock.fritzlab.net/ipv4` annotation, or
// the operator narrows the fallback at the node level via
// NodeConfig.Spec.Defaults.
//
// We define it as a function rather than a var so callers can't mutate the
// shared baseline at runtime.
func BuiltinFamilyDefaults() FamilyDefaults {
return FamilyDefaults{WantV6: true, WantV4: true}
}
// FamilyDefaultsFromNodeConfig resolves the effective per-node defaults,
// falling back to BuiltinFamilyDefaults for any field the NodeConfig leaves
// unset. A nil NodeConfig (or nil Spec.Defaults) returns the built-in
// baseline unchanged.
func FamilyDefaultsFromNodeConfig(nc *flockv1alpha1.NodeConfig) FamilyDefaults {
out := BuiltinFamilyDefaults()
if nc == nil || nc.Spec.Defaults == nil {
return out
}
if nc.Spec.Defaults.IPv6 != nil {
out.WantV6 = *nc.Spec.Defaults.IPv6
}
if nc.Spec.Defaults.IPv4 != nil {
out.WantV4 = *nc.Spec.Defaults.IPv4
}
return out
}
// ParsedAnnotations is the typed view of a pod's flock annotations after the
// node-level defaults have been merged in. All slices are non-nil only when
// the corresponding annotation was present and parsed cleanly.
type ParsedAnnotations struct {
// WantV6 is true when the pod should receive an IPv6 address.
WantV6 bool
// WantV4 is true when the pod should receive an IPv4 address.
WantV4 bool
// CIDR6 narrows IPv6 allocation to specific operator-approved sub-ranges
// of the node's CIDR6 set. nil/empty means "use any node CIDR6".
CIDR6 []*net.IPNet
// CIDR4 narrows IPv4 allocation. nil/empty means "use any node CIDR4".
CIDR4 []*net.IPNet
// Anycast is the set of anycast IPs to bind on the pod's loopback.
// nil/empty means "no anycast".
Anycast []net.IP
// Addresses is the set of additional IPs to bind directly on the pod's
// eth0. BGP advertisement (/128+/32) is identical to Anycast; the only
// difference is that these IPs land on the primary interface instead of
// lo. Use this when the workload needs the IP directly visible on eth0
// (e.g. Plex, which inspects its own interfaces for remote-access setup).
// nil/empty means "no extra addresses".
Addresses []net.IP
}
// ParseAnnotations applies the supplied per-node defaults and validates the
// post-merge combination. It is pure — it does not consult NodeConfig or any
// global state — so it is safe to call from tests and fuzz targets.
//
// Annotation precedence: pod annotation > FamilyDefaults > built-in baseline.
// Callers compute FamilyDefaults via FamilyDefaultsFromNodeConfig and pass it
// in.
//
// Errors:
// - any unknown ipv6/ipv4 value (must be "true" or "false", case-insensitive)
// - any malformed cidr6/cidr4/anycast/ip-algo value
// - the post-merge combination resolves to neither IPv6 nor IPv4 (a pod
// must have at least one address)
func ParseAnnotations(in map[string]string, defaults FamilyDefaults) (*ParsedAnnotations, error) {
out := &ParsedAnnotations{WantV6: defaults.WantV6, WantV4: defaults.WantV4}
if v, ok := in[annotationPrefix+annIPv6]; ok {
b, err := parseBoolAnnotation(annIPv6, v)
if err != nil {
return nil, err
}
out.WantV6 = b
}
if v, ok := in[annotationPrefix+annIPv4]; ok {
b, err := parseBoolAnnotation(annIPv4, v)
if err != nil {
return nil, err
}
out.WantV4 = b
}
if !out.WantV6 && !out.WantV4 {
return nil, fmt.Errorf("annotations + defaults resolve to no address family (need at least one of ipv6/ipv4)")
}
if v, ok := in[annotationPrefix+annCIDR6]; ok {
nets, err := parseCIDRList(v, familyV6)
if err != nil {
return nil, fmt.Errorf("annotation %s: %w", annCIDR6, err)
}
out.CIDR6 = nets
}
if v, ok := in[annotationPrefix+annCIDR4]; ok {
nets, err := parseCIDRList(v, familyV4)
if err != nil {
return nil, fmt.Errorf("annotation %s: %w", annCIDR4, err)
}
out.CIDR4 = nets
}
if v, ok := in[annotationPrefix+annAnycast]; ok {
ips, err := parseIPList(v)
if err != nil {
return nil, fmt.Errorf("annotation %s: %w", annAnycast, err)
}
out.Anycast = ips
}
if v, ok := in[annotationPrefix+annAddresses]; ok {
ips, err := parseIPList(v)
if err != nil {
return nil, fmt.Errorf("annotation %s: %w", annAddresses, err)
}
out.Addresses = ips
}
// Reject pods that ask for an addresses- or anycast-supplied IP whose
// family was disabled (via the pod's ipv6/ipv4 annotation or NodeConfig
// default). Both annotation types put the IP on a pod interface and rely
// on the family being enabled for return-path routing — addresses needs
// the in-pod default v6/v4 route to send replies; anycast on lo needs
// the same default route on eth0 for the same reason. Silently accepting
// the IP would leave a non-functional pod, so we fail closed at ADD.
for _, ip := range out.Addresses {
if err := requireFamilyEnabled(ip, out.WantV6, out.WantV4, annAddresses); err != nil {
return nil, err
}
}
for _, ip := range out.Anycast {
if err := requireFamilyEnabled(ip, out.WantV6, out.WantV4, annAnycast); err != nil {
return nil, err
}
}
return out, nil
}
// requireFamilyEnabled returns an error when ip's family was opted out via
// the resolved WantV6/WantV4 booleans (pod annotation > NodeConfig default >
// built-in dual-stack). The source string identifies which annotation
// supplied the conflicting IP so the operator's error message is specific.
func requireFamilyEnabled(ip net.IP, wantV6, wantV4 bool, source string) error {
if ip.To4() != nil {
if !wantV4 {
return fmt.Errorf("annotation %s: contains IPv4 %s but ipv4 is disabled (annotation or NodeConfig default)", source, ip)
}
return nil
}
if !wantV6 {
return fmt.Errorf("annotation %s: contains IPv6 %s but ipv6 is disabled (annotation or NodeConfig default)", source, ip)
}
return nil
}
// parseBoolAnnotation accepts only "true" or "false" (case-insensitive,
// surrounding whitespace tolerated). All other values — including "1", "0",
// "yes", "no" — are rejected so operator typos are caught loudly rather
// than silently producing the "false" default.
func parseBoolAnnotation(key, v string) (bool, error) {
switch strings.ToLower(strings.TrimSpace(v)) {
case "true":
return true, nil
case "false":
return false, nil
default:
return false, fmt.Errorf("annotation %s=%q: must be \"true\" or \"false\"", key, v)
}
}
// addressFamily distinguishes IPv6 vs IPv4 in places where the parser must
// validate the family of supplied CIDRs.
type addressFamily int
const (
familyAny addressFamily = iota
familyV6
familyV4
)
// parseCIDRList parses a comma-separated CIDR list. Whitespace around items
// is trimmed; empty items are silently dropped. The list must contain at
// least one entry post-trim.
//
// If `want` is familyV6 or familyV4 each entry's family is checked and a
// mismatch is reported, so an `flock.fritzlab.net/cidr6` annotation cannot
// silently slip a v4 prefix into the v6 allocator.
func parseCIDRList(s string, want addressFamily) ([]*net.IPNet, error) {
var out []*net.IPNet
for _, part := range strings.Split(s, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
_, n, err := net.ParseCIDR(part)
if err != nil {
return nil, fmt.Errorf("invalid CIDR %q: %w", part, err)
}
isV4 := n.IP.To4() != nil
switch want {
case familyV6:
if isV4 {
return nil, fmt.Errorf("CIDR %q is IPv4, expected IPv6", part)
}
case familyV4:
if !isV4 {
return nil, fmt.Errorf("CIDR %q is IPv6, expected IPv4", part)
}
}
out = append(out, n)
}
if len(out) == 0 {
return nil, fmt.Errorf("empty CIDR list")
}
return out, nil
}
// parseIPList parses a comma-separated literal-IP list. Same trim/empty
// semantics as parseCIDRList. Mixed v4 and v6 entries are allowed (anycast
// pods can advertise both families together).
func parseIPList(s string) ([]net.IP, error) {
var out []net.IP
for _, part := range strings.Split(s, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
ip := net.ParseIP(part)
if ip == nil {
return nil, fmt.Errorf("invalid IP %q", part)
}
out = append(out, ip)
}
if len(out) == 0 {
return nil, fmt.Errorf("empty IP list")
}
return out, nil
}
// ResolveIPAlgo resolves the effective ip-algo for a pod. Precedence:
//
// pod annotation → NodeConfig annotation → nil (random IID).
//
// Empty, missing, or invalid annotations at any level fall through to the
// next. Invalid input emits a warning via log; a nil log is silent. A nil
// return value means "no algo, generate a fully random IID".
//
// "Invalid" is everything tryParseIPAlgo cannot turn into a non-empty,
// duplicate-free subset of {namespace, pod, image} — unrecognised tokens,
// duplicates, lists that resolve to zero fields after trimming.
func ResolveIPAlgo(podAnn, nodeAnn map[string]string, log *slog.Logger) []embed.Field {
if v, ok := podAnn[annotationPrefix+annIPAlgo]; ok {
if fields := tryParseIPAlgo(v); fields != nil {
return fields
}
warnIPAlgo(log, "pod", v)
}
if v, ok := nodeAnn[annotationPrefix+annIPAlgo]; ok {
if fields := tryParseIPAlgo(v); fields != nil {
return fields
}
warnIPAlgo(log, "NodeConfig", v)
}
return nil
}
// warnIPAlgo logs a single warning when an ip-algo annotation is present
// but cannot be parsed. Empty values are not worth a warn — they are
// indistinguishable from "key absent" by the user's design rule, so we
// only warn when a non-empty value failed parsing.
func warnIPAlgo(log *slog.Logger, source, value string) {
if log == nil {
return
}
if strings.TrimSpace(value) == "" {
return
}
log.Warn("ignoring invalid ip-algo annotation; falling through",
"source", source, "value", value)
}
// tryParseIPAlgo parses an ip-algo annotation value under the relaxed
// "invalid → unset" rules. Returns nil for: empty input, unrecognised
// tokens, duplicate fields, or anything that resolves to zero fields after
// trimming. Returns the ordered field list otherwise.
//
// Duplicates collapse to nil rather than dedup-and-keep so the operator
// notices their malformed annotation via the warn log instead of silently
// losing a field they thought they had specified.
func tryParseIPAlgo(s string) []embed.Field {
var out []embed.Field
seen := map[embed.Field]struct{}{}
for _, part := range strings.Split(s, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
var f embed.Field
switch part {
case string(embed.FieldNamespace):
f = embed.FieldNamespace
case string(embed.FieldApp):
f = embed.FieldApp
case string(embed.FieldImage):
f = embed.FieldImage
default:
return nil
}
if _, dup := seen[f]; dup {
return nil
}
seen[f] = struct{}{}
out = append(out, f)
}
if len(out) == 0 {
return nil
}
return out
}
// CNIArgs is the typed view of the K=V;K=V CNI_ARGS string passed by kubelet.
// We only keep the fields the agent uses; unknown keys are ignored.
type CNIArgs struct {
PodNamespace string
PodName string
PodUID string
InfraID string
}
// ParseCNIArgs is permissive by design — kubelet versions and runtime
// shims pass varying sets of keys. Malformed entries are skipped silently
// rather than failing the whole ADD; required-key validation is the
// caller's responsibility.
func ParseCNIArgs(s string) CNIArgs {
var a CNIArgs
for _, kv := range strings.Split(s, ";") {
eq := strings.IndexByte(kv, '=')
if eq < 0 {
continue
}
k, v := kv[:eq], kv[eq+1:]
switch k {
case "K8S_POD_NAMESPACE":
a.PodNamespace = v
case "K8S_POD_NAME":
a.PodName = v
case "K8S_POD_UID":
a.PodUID = v
case "K8S_POD_INFRA_CONTAINER_ID":
a.InfraID = v
}
}
return a
}