flock/pkg/agent/netpol/translator.go

package netpol

import (
	"fmt"
	"net"
	"sort"

	netv1 "k8s.io/api/networking/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/labels"
)

// Inputs is the world-view the translator consumes. All fields are owned
// by the caller; the translator does not mutate them.
type Inputs struct {
	// LocalPods are the pods scheduled on this node that have a committed
	// flock allocation. Only these pods get rules — peers may live
	// elsewhere.
	LocalPods []Pod

	// PeerPods is the cluster-wide pod set used to resolve podSelector +
	// namespaceSelector peers. It is fine to include the local pods here
	// too; duplicates are deduped by (namespace, name).
	PeerPods []PeerPod

	// Namespaces is the cluster's full Namespace set. Used for
	// namespaceSelector matching.
	Namespaces []Namespace

	// Policies is every NetworkPolicy in the cluster. The translator
	// filters down to those that select at least one local pod.
	Policies []netv1.NetworkPolicy
}

// Output is the result of one translation pass.
type Output struct {
	// Rules is the flat ordered list of allow rules to render. The
	// renderer groups them by (PodKey, Direction) into chains.
	Rules []Rule

	// Isolated is the set of (PodKey, Direction) pairs whose chain must
	// have a default-deny policy. A pod selected by at least one policy
	// in a given direction shows up here. The renderer uses this to
	// decide whether to emit a chain at all and what its base policy is.
	Isolated map[Isolation]struct{}

	// Pods carries the HostIface + IPs for every local pod referenced
	// by the policy world, including pods that produced only isolation
	// (default-deny) without any allow rules. The renderer needs this
	// because such a pod has no Rule to lift the HostIface from.
	Pods map[string]LocalPod // key = namespace/name
}

// Isolation is the (PodKey, Direction) key of the Isolated map.
type Isolation struct {
	PodKey    string
	Direction Direction
}

// Translate runs the translation pass. It is a pure function: same Inputs
// always produces semantically equal Output. (Order of slices is stable
// but Rules within a chain follow the order in which selecting policies
// appear, which is itself sorted; see canonicalisePolicies.)
//
// Errors are returned only for unrecoverable malformed input; per-rule
// translation errors are logged via warn and skipped so that a single
// broken policy can't take down enforcement for a whole node. The optional
// warn callback is invoked for each skipped sub-rule with a human-readable
// message. Pass nil to silently drop.
func Translate(in Inputs, warn func(string)) (Output, error) {
	if warn == nil {
		warn = func(string) {}
	}

	out := Output{
		Isolated: map[Isolation]struct{}{},
		Pods:     map[string]LocalPod{},
	}
	policies := canonicalisePolicies(in.Policies)
	nsByName := indexNamespaces(in.Namespaces)
	peerPodsByNS := indexPeerPods(in.PeerPods)

	for _, pod := range in.LocalPods {
		if len(pod.IPs) == 0 {
			continue // no allocation yet; translator skips
		}
		key := pod.Namespace + "/" + pod.Name

		// Find every policy in pod.Namespace whose podSelector matches.
		// Cross-namespace policies do not select pods outside their own
		// namespace; that's how the NetworkPolicy spec defines it.
		for _, p := range policies {
			if p.Namespace != pod.Namespace {
				continue
			}
			sel, err := metav1.LabelSelectorAsSelector(&p.Spec.PodSelector)
			if err != nil {
				warn(fmt.Sprintf("policy %s/%s: invalid podSelector: %v", p.Namespace, p.Name, err))
				continue
			}
			if !sel.Matches(labels.Set(pod.Labels)) {
				continue
			}

			ingress, egress := policyDirections(&p)
			if ingress || egress {
				out.Pods[key] = LocalPod{
					PodKey:    key,
					HostIface: pod.HostIface,
					IPs:       append([]net.IP(nil), pod.IPs...),
				}
			}
			if ingress {
				out.Isolated[Isolation{PodKey: key, Direction: DirIngress}] = struct{}{}
			}
			if egress {
				out.Isolated[Isolation{PodKey: key, Direction: DirEgress}] = struct{}{}
			}

			// Translate ingress rules.
			if ingress {
				for ri, r := range p.Spec.Ingress {
					rules, err := buildIngressRules(pod, r, p.Namespace, nsByName, peerPodsByNS)
					if err != nil {
						warn(fmt.Sprintf("policy %s/%s ingress[%d]: %v", p.Namespace, p.Name, ri, err))
						continue
					}
					out.Rules = append(out.Rules, rules...)
				}
			}
			// Translate egress rules.
			if egress {
				for ri, r := range p.Spec.Egress {
					rules, err := buildEgressRules(pod, r, p.Namespace, nsByName, peerPodsByNS)
					if err != nil {
						warn(fmt.Sprintf("policy %s/%s egress[%d]: %v", p.Namespace, p.Name, ri, err))
						continue
					}
					out.Rules = append(out.Rules, rules...)
				}
			}
		}
	}
	return out, nil
}

// policyDirections reports which directions a NetworkPolicy isolates.
//
// Per the spec, the PolicyTypes field is the source of truth when set;
// when omitted, isolation is inferred from which rule lists are populated
// (Ingress always; Egress only if Spec.Egress is non-empty).
func policyDirections(p *netv1.NetworkPolicy) (ingress, egress bool) {
	if len(p.Spec.PolicyTypes) > 0 {
		for _, t := range p.Spec.PolicyTypes {
			switch t {
			case netv1.PolicyTypeIngress:
				ingress = true
			case netv1.PolicyTypeEgress:
				egress = true
			}
		}
		return
	}
	ingress = true
	egress = len(p.Spec.Egress) > 0
	return
}

// buildIngressRules expands one NetworkPolicyIngressRule into Rule(s).
// One Rule per allowed peer-set; each Rule carries the full Ports filter
// from the source rule.
func buildIngressRules(
	pod Pod,
	r netv1.NetworkPolicyIngressRule,
	policyNS string,
	nsByName map[string]Namespace,
	peerPodsByNS map[string][]PeerPod,
) ([]Rule, error) {
	ports, err := translatePorts(r.Ports)
	if err != nil {
		return nil, err
	}
	peers, err := translatePeers(r.From, policyNS, nsByName, peerPodsByNS)
	if err != nil {
		return nil, err
	}
	return assembleRules(pod, DirIngress, peers, ports), nil
}

// buildEgressRules is the egress mirror of buildIngressRules.
func buildEgressRules(
	pod Pod,
	r netv1.NetworkPolicyEgressRule,
	policyNS string,
	nsByName map[string]Namespace,
	peerPodsByNS map[string][]PeerPod,
) ([]Rule, error) {
	ports, err := translatePorts(r.Ports)
	if err != nil {
		return nil, err
	}
	peers, err := translatePeers(r.To, policyNS, nsByName, peerPodsByNS)
	if err != nil {
		return nil, err
	}
	return assembleRules(pod, DirEgress, peers, ports), nil
}

// peerSet is the resolved peer information for one rule's From / To list.
type peerSet struct {
	// allowAll is true when the rule has no peers at all (an empty From /
	// To list, which the spec defines as "from anywhere"). It overrides
	// CIDRs and Except.
	allowAll bool
	// CIDRs is the union of every IP / CIDR contributed by the rule's
	// peer entries (resolved Pod IPs, namespace pods, and ipBlock.cidr).
	CIDRs []*net.IPNet
	// Except is the union of every ipBlock.except entry across the rule.
	Except []*net.IPNet
}

// translatePeers resolves a list of NetworkPolicyPeer entries into a
// peerSet. Each peer entry contributes either CIDRs (resolved from
// pod / namespace selectors, or copied from ipBlock) or Except entries.
func translatePeers(
	peers []netv1.NetworkPolicyPeer,
	policyNS string,
	nsByName map[string]Namespace,
	peerPodsByNS map[string][]PeerPod,
) (peerSet, error) {
	if len(peers) == 0 {
		return peerSet{allowAll: true}, nil
	}
	out := peerSet{}
	for i, p := range peers {
		switch {
		case p.IPBlock != nil:
			_, cidr, err := net.ParseCIDR(p.IPBlock.CIDR)
			if err != nil {
				return peerSet{}, fmt.Errorf("peer[%d] ipBlock.cidr %q: %w", i, p.IPBlock.CIDR, err)
			}
			out.CIDRs = append(out.CIDRs, cidr)
			for j, ex := range p.IPBlock.Except {
				_, exNet, err := net.ParseCIDR(ex)
				if err != nil {
					return peerSet{}, fmt.Errorf("peer[%d] ipBlock.except[%d] %q: %w", i, j, ex, err)
				}
				out.Except = append(out.Except, exNet)
			}
		case p.PodSelector != nil || p.NamespaceSelector != nil:
			ips, err := resolvePodNamespacePeer(p, policyNS, nsByName, peerPodsByNS)
			if err != nil {
				return peerSet{}, fmt.Errorf("peer[%d]: %w", i, err)
			}
			out.CIDRs = append(out.CIDRs, ips...)
		default:
			return peerSet{}, fmt.Errorf("peer[%d] is empty (must set ipBlock, podSelector, or namespaceSelector)", i)
		}
	}
	return out, nil
}

// resolvePodNamespacePeer walks the cluster's peer-pod set and returns
// /128 (v6) and /32 (v4) CIDRs for each pod that matches the (possibly
// combined) pod + namespace selectors.
//
// Selector semantics from the NetworkPolicy spec:
//
//   - podSelector + namespaceSelector both nil → handled upstream.
//   - podSelector set, namespaceSelector nil → match in the policy's
//     own namespace.
//   - podSelector nil, namespaceSelector set → match every pod in
//     namespaces that match the namespaceSelector.
//   - both set → AND: pod must be in a matching namespace AND match
//     the podSelector.
//
// An empty (non-nil) selector matches everything in scope.
func resolvePodNamespacePeer(
	p netv1.NetworkPolicyPeer,
	policyNS string,
	nsByName map[string]Namespace,
	peerPodsByNS map[string][]PeerPod,
) ([]*net.IPNet, error) {
	var podSel, nsSel labels.Selector
	if p.PodSelector != nil {
		s, err := metav1.LabelSelectorAsSelector(p.PodSelector)
		if err != nil {
			return nil, fmt.Errorf("podSelector: %w", err)
		}
		podSel = s
	}
	if p.NamespaceSelector != nil {
		s, err := metav1.LabelSelectorAsSelector(p.NamespaceSelector)
		if err != nil {
			return nil, fmt.Errorf("namespaceSelector: %w", err)
		}
		nsSel = s
	}

	// Decide which namespaces are in scope.
	var inScope []string
	if nsSel == nil {
		// Pod-only selector → just the policy's own namespace.
		inScope = []string{policyNS}
	} else {
		for name, ns := range nsByName {
			if nsSel.Matches(labels.Set(ns.Labels)) {
				inScope = append(inScope, name)
			}
		}
	}

	var out []*net.IPNet
	for _, ns := range inScope {
		for _, pp := range peerPodsByNS[ns] {
			if podSel != nil && !podSel.Matches(labels.Set(pp.Labels)) {
				continue
			}
			for _, ip := range pp.IPs {
				out = append(out, ipToHostCIDR(ip))
			}
		}
	}
	return out, nil
}

// translatePorts converts NetworkPolicyPort entries into PortMatch.
//
// A nil/empty Ports list on a NetworkPolicy rule means "all ports" by
// spec; we represent that as a single zero-valued PortMatch (any proto,
// any port) so the renderer can emit a single rule rather than a chain
// of port-equality matches.
func translatePorts(ports []netv1.NetworkPolicyPort) ([]PortMatch, error) {
	if len(ports) == 0 {
		return []PortMatch{{}}, nil
	}
	var out []PortMatch
	for i, p := range ports {
		var protoStr string
		if p.Protocol != nil {
			switch *p.Protocol {
			case "TCP":
				protoStr = "tcp"
			case "UDP":
				protoStr = "udp"
			case "SCTP":
				protoStr = "sctp"
			default:
				return nil, fmt.Errorf("port[%d]: protocol %q not supported", i, *p.Protocol)
			}
		} else {
			// Spec default: TCP. We use empty string to mean "any of
			// the three" only when the user explicitly sets neither
			// protocol nor port; here the user has supplied a Port,
			// which implies a protocol — and the spec default is TCP.
			protoStr = "tcp"
		}
		var port, endPort int
		if p.Port != nil {
			if p.Port.Type != 0 { // intstr.Int = 0; intstr.String = 1
				return nil, fmt.Errorf("port[%d]: named ports are not yet supported", i)
			}
			port = int(p.Port.IntVal)
		}
		if p.EndPort != nil {
			endPort = int(*p.EndPort)
			if endPort < port {
				return nil, fmt.Errorf("port[%d]: endPort %d < port %d", i, endPort, port)
			}
		}
		out = append(out, PortMatch{Protocol: protoStr, Port: port, EndPort: endPort})
	}
	return out, nil
}

// assembleRules emits the cross-product of (one peer-set) × (port list).
// We currently emit a single Rule per direction since the peer-set is the
// expensive shared field; ports go inline. allowAll peers result in a
// rule with no PeerCIDRs, which the renderer treats as "any source".
func assembleRules(pod Pod, dir Direction, peers peerSet, ports []PortMatch) []Rule {
	if !peers.allowAll && len(peers.CIDRs) == 0 {
		// Selector matched no peers (e.g. podSelector for a label that
		// no live pod has). Emit nothing — the rule cannot allow any
		// real traffic. The pod stays in default-deny for this rule.
		return nil
	}
	r := Rule{
		PodKey:    pod.Namespace + "/" + pod.Name,
		HostIface: pod.HostIface,
		PodIPs:    append([]net.IP(nil), pod.IPs...),
		Direction: dir,
		Action:    ActionAccept,
		Ports:     append([]PortMatch(nil), ports...),
	}
	if !peers.allowAll {
		r.PeerCIDRs = append([]*net.IPNet(nil), peers.CIDRs...)
		r.PeerExcept = append([]*net.IPNet(nil), peers.Except...)
	}
	return []Rule{r}
}

// canonicalisePolicies sorts the policy slice by (namespace, name) so the
// translator's output is deterministic regardless of informer event order.
func canonicalisePolicies(p []netv1.NetworkPolicy) []netv1.NetworkPolicy {
	out := append([]netv1.NetworkPolicy(nil), p...)
	sort.Slice(out, func(i, j int) bool {
		if out[i].Namespace != out[j].Namespace {
			return out[i].Namespace < out[j].Namespace
		}
		return out[i].Name < out[j].Name
	})
	return out
}

func indexNamespaces(nss []Namespace) map[string]Namespace {
	out := make(map[string]Namespace, len(nss))
	for _, ns := range nss {
		out[ns.Name] = ns
	}
	return out
}

func indexPeerPods(pods []PeerPod) map[string][]PeerPod {
	out := map[string][]PeerPod{}
	for _, p := range pods {
		out[p.Namespace] = append(out[p.Namespace], p)
	}
	// Sort each namespace's pod list by (name) so the translator's IP
	// ordering is stable.
	for k := range out {
		sort.Slice(out[k], func(i, j int) bool { return out[k][i].Name < out[k][j].Name })
	}
	return out
}

// ipToHostCIDR returns ip/32 (v4) or ip/128 (v6) — the smallest CIDR
// covering exactly that one address.
func ipToHostCIDR(ip net.IP) *net.IPNet {
	if v4 := ip.To4(); v4 != nil {
		return &net.IPNet{IP: v4, Mask: net.CIDRMask(32, 32)}
	}
	return &net.IPNet{IP: ip.To16(), Mask: net.CIDRMask(128, 128)}
}