package netpol import ( "fmt" "net" "sort" netv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" ) // Inputs is the world-view the translator consumes. All fields are owned // by the caller; the translator does not mutate them. type Inputs struct { // LocalPods are the pods scheduled on this node that have a committed // flock allocation. Only these pods get rules — peers may live // elsewhere. LocalPods []Pod // PeerPods is the cluster-wide pod set used to resolve podSelector + // namespaceSelector peers. It is fine to include the local pods here // too; duplicates are deduped by (namespace, name). PeerPods []PeerPod // Namespaces is the cluster's full Namespace set. Used for // namespaceSelector matching. Namespaces []Namespace // Policies is every NetworkPolicy in the cluster. The translator // filters down to those that select at least one local pod. Policies []netv1.NetworkPolicy } // Output is the result of one translation pass. type Output struct { // Rules is the flat ordered list of allow rules to render. The // renderer groups them by (PodKey, Direction) into chains. Rules []Rule // Isolated is the set of (PodKey, Direction) pairs whose chain must // have a default-deny policy. A pod selected by at least one policy // in a given direction shows up here. The renderer uses this to // decide whether to emit a chain at all and what its base policy is. Isolated map[Isolation]struct{} // Pods carries the HostIface + IPs for every local pod referenced // by the policy world, including pods that produced only isolation // (default-deny) without any allow rules. The renderer needs this // because such a pod has no Rule to lift the HostIface from. Pods map[string]LocalPod // key = namespace/name } // Isolation is the (PodKey, Direction) key of the Isolated map. type Isolation struct { PodKey string Direction Direction } // Translate runs the translation pass. It is a pure function: same Inputs // always produces semantically equal Output. (Order of slices is stable // but Rules within a chain follow the order in which selecting policies // appear, which is itself sorted; see canonicalisePolicies.) // // Errors are returned only for unrecoverable malformed input; per-rule // translation errors are logged via warn and skipped so that a single // broken policy can't take down enforcement for a whole node. The optional // warn callback is invoked for each skipped sub-rule with a human-readable // message. Pass nil to silently drop. func Translate(in Inputs, warn func(string)) (Output, error) { if warn == nil { warn = func(string) {} } out := Output{ Isolated: map[Isolation]struct{}{}, Pods: map[string]LocalPod{}, } policies := canonicalisePolicies(in.Policies) nsByName := indexNamespaces(in.Namespaces) peerPodsByNS := indexPeerPods(in.PeerPods) for _, pod := range in.LocalPods { if len(pod.IPs) == 0 { continue // no allocation yet; translator skips } key := pod.Namespace + "/" + pod.Name // Find every policy in pod.Namespace whose podSelector matches. // Cross-namespace policies do not select pods outside their own // namespace; that's how the NetworkPolicy spec defines it. for _, p := range policies { if p.Namespace != pod.Namespace { continue } sel, err := metav1.LabelSelectorAsSelector(&p.Spec.PodSelector) if err != nil { warn(fmt.Sprintf("policy %s/%s: invalid podSelector: %v", p.Namespace, p.Name, err)) continue } if !sel.Matches(labels.Set(pod.Labels)) { continue } ingress, egress := policyDirections(&p) if ingress || egress { out.Pods[key] = LocalPod{ PodKey: key, HostIface: pod.HostIface, IPs: append([]net.IP(nil), pod.IPs...), } } if ingress { out.Isolated[Isolation{PodKey: key, Direction: DirIngress}] = struct{}{} } if egress { out.Isolated[Isolation{PodKey: key, Direction: DirEgress}] = struct{}{} } // Translate ingress rules. if ingress { for ri, r := range p.Spec.Ingress { rules, err := buildIngressRules(pod, r, p.Namespace, nsByName, peerPodsByNS) if err != nil { warn(fmt.Sprintf("policy %s/%s ingress[%d]: %v", p.Namespace, p.Name, ri, err)) continue } out.Rules = append(out.Rules, rules...) } } // Translate egress rules. if egress { for ri, r := range p.Spec.Egress { rules, err := buildEgressRules(pod, r, p.Namespace, nsByName, peerPodsByNS) if err != nil { warn(fmt.Sprintf("policy %s/%s egress[%d]: %v", p.Namespace, p.Name, ri, err)) continue } out.Rules = append(out.Rules, rules...) } } } } return out, nil } // policyDirections reports which directions a NetworkPolicy isolates. // // Per the spec, the PolicyTypes field is the source of truth when set; // when omitted, isolation is inferred from which rule lists are populated // (Ingress always; Egress only if Spec.Egress is non-empty). func policyDirections(p *netv1.NetworkPolicy) (ingress, egress bool) { if len(p.Spec.PolicyTypes) > 0 { for _, t := range p.Spec.PolicyTypes { switch t { case netv1.PolicyTypeIngress: ingress = true case netv1.PolicyTypeEgress: egress = true } } return } ingress = true egress = len(p.Spec.Egress) > 0 return } // buildIngressRules expands one NetworkPolicyIngressRule into Rule(s). // One Rule per allowed peer-set; each Rule carries the full Ports filter // from the source rule. func buildIngressRules( pod Pod, r netv1.NetworkPolicyIngressRule, policyNS string, nsByName map[string]Namespace, peerPodsByNS map[string][]PeerPod, ) ([]Rule, error) { ports, err := translatePorts(r.Ports) if err != nil { return nil, err } peers, err := translatePeers(r.From, policyNS, nsByName, peerPodsByNS) if err != nil { return nil, err } return assembleRules(pod, DirIngress, peers, ports), nil } // buildEgressRules is the egress mirror of buildIngressRules. func buildEgressRules( pod Pod, r netv1.NetworkPolicyEgressRule, policyNS string, nsByName map[string]Namespace, peerPodsByNS map[string][]PeerPod, ) ([]Rule, error) { ports, err := translatePorts(r.Ports) if err != nil { return nil, err } peers, err := translatePeers(r.To, policyNS, nsByName, peerPodsByNS) if err != nil { return nil, err } return assembleRules(pod, DirEgress, peers, ports), nil } // peerSet is the resolved peer information for one rule's From / To list. type peerSet struct { // allowAll is true when the rule has no peers at all (an empty From / // To list, which the spec defines as "from anywhere"). It overrides // CIDRs and Except. allowAll bool // CIDRs is the union of every IP / CIDR contributed by the rule's // peer entries (resolved Pod IPs, namespace pods, and ipBlock.cidr). CIDRs []*net.IPNet // Except is the union of every ipBlock.except entry across the rule. Except []*net.IPNet } // translatePeers resolves a list of NetworkPolicyPeer entries into a // peerSet. Each peer entry contributes either CIDRs (resolved from // pod / namespace selectors, or copied from ipBlock) or Except entries. func translatePeers( peers []netv1.NetworkPolicyPeer, policyNS string, nsByName map[string]Namespace, peerPodsByNS map[string][]PeerPod, ) (peerSet, error) { if len(peers) == 0 { return peerSet{allowAll: true}, nil } out := peerSet{} for i, p := range peers { switch { case p.IPBlock != nil: _, cidr, err := net.ParseCIDR(p.IPBlock.CIDR) if err != nil { return peerSet{}, fmt.Errorf("peer[%d] ipBlock.cidr %q: %w", i, p.IPBlock.CIDR, err) } out.CIDRs = append(out.CIDRs, cidr) for j, ex := range p.IPBlock.Except { _, exNet, err := net.ParseCIDR(ex) if err != nil { return peerSet{}, fmt.Errorf("peer[%d] ipBlock.except[%d] %q: %w", i, j, ex, err) } out.Except = append(out.Except, exNet) } case p.PodSelector != nil || p.NamespaceSelector != nil: ips, err := resolvePodNamespacePeer(p, policyNS, nsByName, peerPodsByNS) if err != nil { return peerSet{}, fmt.Errorf("peer[%d]: %w", i, err) } out.CIDRs = append(out.CIDRs, ips...) default: return peerSet{}, fmt.Errorf("peer[%d] is empty (must set ipBlock, podSelector, or namespaceSelector)", i) } } return out, nil } // resolvePodNamespacePeer walks the cluster's peer-pod set and returns // /128 (v6) and /32 (v4) CIDRs for each pod that matches the (possibly // combined) pod + namespace selectors. // // Selector semantics from the NetworkPolicy spec: // // - podSelector + namespaceSelector both nil → handled upstream. // - podSelector set, namespaceSelector nil → match in the policy's // own namespace. // - podSelector nil, namespaceSelector set → match every pod in // namespaces that match the namespaceSelector. // - both set → AND: pod must be in a matching namespace AND match // the podSelector. // // An empty (non-nil) selector matches everything in scope. func resolvePodNamespacePeer( p netv1.NetworkPolicyPeer, policyNS string, nsByName map[string]Namespace, peerPodsByNS map[string][]PeerPod, ) ([]*net.IPNet, error) { var podSel, nsSel labels.Selector if p.PodSelector != nil { s, err := metav1.LabelSelectorAsSelector(p.PodSelector) if err != nil { return nil, fmt.Errorf("podSelector: %w", err) } podSel = s } if p.NamespaceSelector != nil { s, err := metav1.LabelSelectorAsSelector(p.NamespaceSelector) if err != nil { return nil, fmt.Errorf("namespaceSelector: %w", err) } nsSel = s } // Decide which namespaces are in scope. var inScope []string if nsSel == nil { // Pod-only selector → just the policy's own namespace. inScope = []string{policyNS} } else { for name, ns := range nsByName { if nsSel.Matches(labels.Set(ns.Labels)) { inScope = append(inScope, name) } } } var out []*net.IPNet for _, ns := range inScope { for _, pp := range peerPodsByNS[ns] { if podSel != nil && !podSel.Matches(labels.Set(pp.Labels)) { continue } for _, ip := range pp.IPs { out = append(out, ipToHostCIDR(ip)) } } } return out, nil } // translatePorts converts NetworkPolicyPort entries into PortMatch. // // A nil/empty Ports list on a NetworkPolicy rule means "all ports" by // spec; we represent that as a single zero-valued PortMatch (any proto, // any port) so the renderer can emit a single rule rather than a chain // of port-equality matches. func translatePorts(ports []netv1.NetworkPolicyPort) ([]PortMatch, error) { if len(ports) == 0 { return []PortMatch{{}}, nil } var out []PortMatch for i, p := range ports { var protoStr string if p.Protocol != nil { switch *p.Protocol { case "TCP": protoStr = "tcp" case "UDP": protoStr = "udp" case "SCTP": protoStr = "sctp" default: return nil, fmt.Errorf("port[%d]: protocol %q not supported", i, *p.Protocol) } } else { // Spec default: TCP. We use empty string to mean "any of // the three" only when the user explicitly sets neither // protocol nor port; here the user has supplied a Port, // which implies a protocol — and the spec default is TCP. protoStr = "tcp" } var port, endPort int if p.Port != nil { if p.Port.Type != 0 { // intstr.Int = 0; intstr.String = 1 return nil, fmt.Errorf("port[%d]: named ports are not yet supported", i) } port = int(p.Port.IntVal) } if p.EndPort != nil { endPort = int(*p.EndPort) if endPort < port { return nil, fmt.Errorf("port[%d]: endPort %d < port %d", i, endPort, port) } } out = append(out, PortMatch{Protocol: protoStr, Port: port, EndPort: endPort}) } return out, nil } // assembleRules emits the cross-product of (one peer-set) × (port list). // We currently emit a single Rule per direction since the peer-set is the // expensive shared field; ports go inline. allowAll peers result in a // rule with no PeerCIDRs, which the renderer treats as "any source". func assembleRules(pod Pod, dir Direction, peers peerSet, ports []PortMatch) []Rule { if !peers.allowAll && len(peers.CIDRs) == 0 { // Selector matched no peers (e.g. podSelector for a label that // no live pod has). Emit nothing — the rule cannot allow any // real traffic. The pod stays in default-deny for this rule. return nil } r := Rule{ PodKey: pod.Namespace + "/" + pod.Name, HostIface: pod.HostIface, PodIPs: append([]net.IP(nil), pod.IPs...), Direction: dir, Action: ActionAccept, Ports: append([]PortMatch(nil), ports...), } if !peers.allowAll { r.PeerCIDRs = append([]*net.IPNet(nil), peers.CIDRs...) r.PeerExcept = append([]*net.IPNet(nil), peers.Except...) } return []Rule{r} } // canonicalisePolicies sorts the policy slice by (namespace, name) so the // translator's output is deterministic regardless of informer event order. func canonicalisePolicies(p []netv1.NetworkPolicy) []netv1.NetworkPolicy { out := append([]netv1.NetworkPolicy(nil), p...) sort.Slice(out, func(i, j int) bool { if out[i].Namespace != out[j].Namespace { return out[i].Namespace < out[j].Namespace } return out[i].Name < out[j].Name }) return out } func indexNamespaces(nss []Namespace) map[string]Namespace { out := make(map[string]Namespace, len(nss)) for _, ns := range nss { out[ns.Name] = ns } return out } func indexPeerPods(pods []PeerPod) map[string][]PeerPod { out := map[string][]PeerPod{} for _, p := range pods { out[p.Namespace] = append(out[p.Namespace], p) } // Sort each namespace's pod list by (name) so the translator's IP // ordering is stable. for k := range out { sort.Slice(out[k], func(i, j int) bool { return out[k][i].Name < out[k][j].Name }) } return out } // ipToHostCIDR returns ip/32 (v4) or ip/128 (v6) — the smallest CIDR // covering exactly that one address. func ipToHostCIDR(ip net.IP) *net.IPNet { if v4 := ip.To4(); v4 != nil { return &net.IPNet{IP: v4, Mask: net.CIDRMask(32, 32)} } return &net.IPNet{IP: ip.To16(), Mask: net.CIDRMask(128, 128)} }