444 lines
14 KiB
Go
444 lines
14 KiB
Go
|
|
package netpol
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"fmt"
|
|||
|
|
"net"
|
|||
|
|
"sort"
|
|||
|
|
|
|||
|
|
netv1 "k8s.io/api/networking/v1"
|
|||
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|||
|
|
"k8s.io/apimachinery/pkg/labels"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// Inputs is the world-view the translator consumes. All fields are owned
|
|||
|
|
// by the caller; the translator does not mutate them.
|
|||
|
|
type Inputs struct {
|
|||
|
|
// LocalPods are the pods scheduled on this node that have a committed
|
|||
|
|
// flock allocation. Only these pods get rules — peers may live
|
|||
|
|
// elsewhere.
|
|||
|
|
LocalPods []Pod
|
|||
|
|
|
|||
|
|
// PeerPods is the cluster-wide pod set used to resolve podSelector +
|
|||
|
|
// namespaceSelector peers. It is fine to include the local pods here
|
|||
|
|
// too; duplicates are deduped by (namespace, name).
|
|||
|
|
PeerPods []PeerPod
|
|||
|
|
|
|||
|
|
// Namespaces is the cluster's full Namespace set. Used for
|
|||
|
|
// namespaceSelector matching.
|
|||
|
|
Namespaces []Namespace
|
|||
|
|
|
|||
|
|
// Policies is every NetworkPolicy in the cluster. The translator
|
|||
|
|
// filters down to those that select at least one local pod.
|
|||
|
|
Policies []netv1.NetworkPolicy
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Output is the result of one translation pass.
|
|||
|
|
type Output struct {
|
|||
|
|
// Rules is the flat ordered list of allow rules to render. The
|
|||
|
|
// renderer groups them by (PodKey, Direction) into chains.
|
|||
|
|
Rules []Rule
|
|||
|
|
|
|||
|
|
// Isolated is the set of (PodKey, Direction) pairs whose chain must
|
|||
|
|
// have a default-deny policy. A pod selected by at least one policy
|
|||
|
|
// in a given direction shows up here. The renderer uses this to
|
|||
|
|
// decide whether to emit a chain at all and what its base policy is.
|
|||
|
|
Isolated map[Isolation]struct{}
|
|||
|
|
|
|||
|
|
// Pods carries the HostIface + IPs for every local pod referenced
|
|||
|
|
// by the policy world, including pods that produced only isolation
|
|||
|
|
// (default-deny) without any allow rules. The renderer needs this
|
|||
|
|
// because such a pod has no Rule to lift the HostIface from.
|
|||
|
|
Pods map[string]LocalPod // key = namespace/name
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Isolation is the (PodKey, Direction) key of the Isolated map.
|
|||
|
|
type Isolation struct {
|
|||
|
|
PodKey string
|
|||
|
|
Direction Direction
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Translate runs the translation pass. It is a pure function: same Inputs
|
|||
|
|
// always produces semantically equal Output. (Order of slices is stable
|
|||
|
|
// but Rules within a chain follow the order in which selecting policies
|
|||
|
|
// appear, which is itself sorted; see canonicalisePolicies.)
|
|||
|
|
//
|
|||
|
|
// Errors are returned only for unrecoverable malformed input; per-rule
|
|||
|
|
// translation errors are logged via warn and skipped so that a single
|
|||
|
|
// broken policy can't take down enforcement for a whole node. The optional
|
|||
|
|
// warn callback is invoked for each skipped sub-rule with a human-readable
|
|||
|
|
// message. Pass nil to silently drop.
|
|||
|
|
func Translate(in Inputs, warn func(string)) (Output, error) {
|
|||
|
|
if warn == nil {
|
|||
|
|
warn = func(string) {}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
out := Output{
|
|||
|
|
Isolated: map[Isolation]struct{}{},
|
|||
|
|
Pods: map[string]LocalPod{},
|
|||
|
|
}
|
|||
|
|
policies := canonicalisePolicies(in.Policies)
|
|||
|
|
nsByName := indexNamespaces(in.Namespaces)
|
|||
|
|
peerPodsByNS := indexPeerPods(in.PeerPods)
|
|||
|
|
|
|||
|
|
for _, pod := range in.LocalPods {
|
|||
|
|
if len(pod.IPs) == 0 {
|
|||
|
|
continue // no allocation yet; translator skips
|
|||
|
|
}
|
|||
|
|
key := pod.Namespace + "/" + pod.Name
|
|||
|
|
|
|||
|
|
// Find every policy in pod.Namespace whose podSelector matches.
|
|||
|
|
// Cross-namespace policies do not select pods outside their own
|
|||
|
|
// namespace; that's how the NetworkPolicy spec defines it.
|
|||
|
|
for _, p := range policies {
|
|||
|
|
if p.Namespace != pod.Namespace {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
sel, err := metav1.LabelSelectorAsSelector(&p.Spec.PodSelector)
|
|||
|
|
if err != nil {
|
|||
|
|
warn(fmt.Sprintf("policy %s/%s: invalid podSelector: %v", p.Namespace, p.Name, err))
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if !sel.Matches(labels.Set(pod.Labels)) {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
ingress, egress := policyDirections(&p)
|
|||
|
|
if ingress || egress {
|
|||
|
|
out.Pods[key] = LocalPod{
|
|||
|
|
PodKey: key,
|
|||
|
|
HostIface: pod.HostIface,
|
|||
|
|
IPs: append([]net.IP(nil), pod.IPs...),
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if ingress {
|
|||
|
|
out.Isolated[Isolation{PodKey: key, Direction: DirIngress}] = struct{}{}
|
|||
|
|
}
|
|||
|
|
if egress {
|
|||
|
|
out.Isolated[Isolation{PodKey: key, Direction: DirEgress}] = struct{}{}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Translate ingress rules.
|
|||
|
|
if ingress {
|
|||
|
|
for ri, r := range p.Spec.Ingress {
|
|||
|
|
rules, err := buildIngressRules(pod, r, p.Namespace, nsByName, peerPodsByNS)
|
|||
|
|
if err != nil {
|
|||
|
|
warn(fmt.Sprintf("policy %s/%s ingress[%d]: %v", p.Namespace, p.Name, ri, err))
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
out.Rules = append(out.Rules, rules...)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
// Translate egress rules.
|
|||
|
|
if egress {
|
|||
|
|
for ri, r := range p.Spec.Egress {
|
|||
|
|
rules, err := buildEgressRules(pod, r, p.Namespace, nsByName, peerPodsByNS)
|
|||
|
|
if err != nil {
|
|||
|
|
warn(fmt.Sprintf("policy %s/%s egress[%d]: %v", p.Namespace, p.Name, ri, err))
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
out.Rules = append(out.Rules, rules...)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return out, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// policyDirections reports which directions a NetworkPolicy isolates.
|
|||
|
|
//
|
|||
|
|
// Per the spec, the PolicyTypes field is the source of truth when set;
|
|||
|
|
// when omitted, isolation is inferred from which rule lists are populated
|
|||
|
|
// (Ingress always; Egress only if Spec.Egress is non-empty).
|
|||
|
|
func policyDirections(p *netv1.NetworkPolicy) (ingress, egress bool) {
|
|||
|
|
if len(p.Spec.PolicyTypes) > 0 {
|
|||
|
|
for _, t := range p.Spec.PolicyTypes {
|
|||
|
|
switch t {
|
|||
|
|
case netv1.PolicyTypeIngress:
|
|||
|
|
ingress = true
|
|||
|
|
case netv1.PolicyTypeEgress:
|
|||
|
|
egress = true
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
ingress = true
|
|||
|
|
egress = len(p.Spec.Egress) > 0
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// buildIngressRules expands one NetworkPolicyIngressRule into Rule(s).
|
|||
|
|
// One Rule per allowed peer-set; each Rule carries the full Ports filter
|
|||
|
|
// from the source rule.
|
|||
|
|
func buildIngressRules(
|
|||
|
|
pod Pod,
|
|||
|
|
r netv1.NetworkPolicyIngressRule,
|
|||
|
|
policyNS string,
|
|||
|
|
nsByName map[string]Namespace,
|
|||
|
|
peerPodsByNS map[string][]PeerPod,
|
|||
|
|
) ([]Rule, error) {
|
|||
|
|
ports, err := translatePorts(r.Ports)
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
peers, err := translatePeers(r.From, policyNS, nsByName, peerPodsByNS)
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
return assembleRules(pod, DirIngress, peers, ports), nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// buildEgressRules is the egress mirror of buildIngressRules.
|
|||
|
|
func buildEgressRules(
|
|||
|
|
pod Pod,
|
|||
|
|
r netv1.NetworkPolicyEgressRule,
|
|||
|
|
policyNS string,
|
|||
|
|
nsByName map[string]Namespace,
|
|||
|
|
peerPodsByNS map[string][]PeerPod,
|
|||
|
|
) ([]Rule, error) {
|
|||
|
|
ports, err := translatePorts(r.Ports)
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
peers, err := translatePeers(r.To, policyNS, nsByName, peerPodsByNS)
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
return assembleRules(pod, DirEgress, peers, ports), nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// peerSet is the resolved peer information for one rule's From / To list.
|
|||
|
|
type peerSet struct {
|
|||
|
|
// allowAll is true when the rule has no peers at all (an empty From /
|
|||
|
|
// To list, which the spec defines as "from anywhere"). It overrides
|
|||
|
|
// CIDRs and Except.
|
|||
|
|
allowAll bool
|
|||
|
|
// CIDRs is the union of every IP / CIDR contributed by the rule's
|
|||
|
|
// peer entries (resolved Pod IPs, namespace pods, and ipBlock.cidr).
|
|||
|
|
CIDRs []*net.IPNet
|
|||
|
|
// Except is the union of every ipBlock.except entry across the rule.
|
|||
|
|
Except []*net.IPNet
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// translatePeers resolves a list of NetworkPolicyPeer entries into a
|
|||
|
|
// peerSet. Each peer entry contributes either CIDRs (resolved from
|
|||
|
|
// pod / namespace selectors, or copied from ipBlock) or Except entries.
|
|||
|
|
func translatePeers(
|
|||
|
|
peers []netv1.NetworkPolicyPeer,
|
|||
|
|
policyNS string,
|
|||
|
|
nsByName map[string]Namespace,
|
|||
|
|
peerPodsByNS map[string][]PeerPod,
|
|||
|
|
) (peerSet, error) {
|
|||
|
|
if len(peers) == 0 {
|
|||
|
|
return peerSet{allowAll: true}, nil
|
|||
|
|
}
|
|||
|
|
out := peerSet{}
|
|||
|
|
for i, p := range peers {
|
|||
|
|
switch {
|
|||
|
|
case p.IPBlock != nil:
|
|||
|
|
_, cidr, err := net.ParseCIDR(p.IPBlock.CIDR)
|
|||
|
|
if err != nil {
|
|||
|
|
return peerSet{}, fmt.Errorf("peer[%d] ipBlock.cidr %q: %w", i, p.IPBlock.CIDR, err)
|
|||
|
|
}
|
|||
|
|
out.CIDRs = append(out.CIDRs, cidr)
|
|||
|
|
for j, ex := range p.IPBlock.Except {
|
|||
|
|
_, exNet, err := net.ParseCIDR(ex)
|
|||
|
|
if err != nil {
|
|||
|
|
return peerSet{}, fmt.Errorf("peer[%d] ipBlock.except[%d] %q: %w", i, j, ex, err)
|
|||
|
|
}
|
|||
|
|
out.Except = append(out.Except, exNet)
|
|||
|
|
}
|
|||
|
|
case p.PodSelector != nil || p.NamespaceSelector != nil:
|
|||
|
|
ips, err := resolvePodNamespacePeer(p, policyNS, nsByName, peerPodsByNS)
|
|||
|
|
if err != nil {
|
|||
|
|
return peerSet{}, fmt.Errorf("peer[%d]: %w", i, err)
|
|||
|
|
}
|
|||
|
|
out.CIDRs = append(out.CIDRs, ips...)
|
|||
|
|
default:
|
|||
|
|
return peerSet{}, fmt.Errorf("peer[%d] is empty (must set ipBlock, podSelector, or namespaceSelector)", i)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return out, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// resolvePodNamespacePeer walks the cluster's peer-pod set and returns
|
|||
|
|
// /128 (v6) and /32 (v4) CIDRs for each pod that matches the (possibly
|
|||
|
|
// combined) pod + namespace selectors.
|
|||
|
|
//
|
|||
|
|
// Selector semantics from the NetworkPolicy spec:
|
|||
|
|
//
|
|||
|
|
// - podSelector + namespaceSelector both nil → handled upstream.
|
|||
|
|
// - podSelector set, namespaceSelector nil → match in the policy's
|
|||
|
|
// own namespace.
|
|||
|
|
// - podSelector nil, namespaceSelector set → match every pod in
|
|||
|
|
// namespaces that match the namespaceSelector.
|
|||
|
|
// - both set → AND: pod must be in a matching namespace AND match
|
|||
|
|
// the podSelector.
|
|||
|
|
//
|
|||
|
|
// An empty (non-nil) selector matches everything in scope.
|
|||
|
|
func resolvePodNamespacePeer(
|
|||
|
|
p netv1.NetworkPolicyPeer,
|
|||
|
|
policyNS string,
|
|||
|
|
nsByName map[string]Namespace,
|
|||
|
|
peerPodsByNS map[string][]PeerPod,
|
|||
|
|
) ([]*net.IPNet, error) {
|
|||
|
|
var podSel, nsSel labels.Selector
|
|||
|
|
if p.PodSelector != nil {
|
|||
|
|
s, err := metav1.LabelSelectorAsSelector(p.PodSelector)
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, fmt.Errorf("podSelector: %w", err)
|
|||
|
|
}
|
|||
|
|
podSel = s
|
|||
|
|
}
|
|||
|
|
if p.NamespaceSelector != nil {
|
|||
|
|
s, err := metav1.LabelSelectorAsSelector(p.NamespaceSelector)
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, fmt.Errorf("namespaceSelector: %w", err)
|
|||
|
|
}
|
|||
|
|
nsSel = s
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Decide which namespaces are in scope.
|
|||
|
|
var inScope []string
|
|||
|
|
if nsSel == nil {
|
|||
|
|
// Pod-only selector → just the policy's own namespace.
|
|||
|
|
inScope = []string{policyNS}
|
|||
|
|
} else {
|
|||
|
|
for name, ns := range nsByName {
|
|||
|
|
if nsSel.Matches(labels.Set(ns.Labels)) {
|
|||
|
|
inScope = append(inScope, name)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var out []*net.IPNet
|
|||
|
|
for _, ns := range inScope {
|
|||
|
|
for _, pp := range peerPodsByNS[ns] {
|
|||
|
|
if podSel != nil && !podSel.Matches(labels.Set(pp.Labels)) {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
for _, ip := range pp.IPs {
|
|||
|
|
out = append(out, ipToHostCIDR(ip))
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return out, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// translatePorts converts NetworkPolicyPort entries into PortMatch.
|
|||
|
|
//
|
|||
|
|
// A nil/empty Ports list on a NetworkPolicy rule means "all ports" by
|
|||
|
|
// spec; we represent that as a single zero-valued PortMatch (any proto,
|
|||
|
|
// any port) so the renderer can emit a single rule rather than a chain
|
|||
|
|
// of port-equality matches.
|
|||
|
|
func translatePorts(ports []netv1.NetworkPolicyPort) ([]PortMatch, error) {
|
|||
|
|
if len(ports) == 0 {
|
|||
|
|
return []PortMatch{{}}, nil
|
|||
|
|
}
|
|||
|
|
var out []PortMatch
|
|||
|
|
for i, p := range ports {
|
|||
|
|
var protoStr string
|
|||
|
|
if p.Protocol != nil {
|
|||
|
|
switch *p.Protocol {
|
|||
|
|
case "TCP":
|
|||
|
|
protoStr = "tcp"
|
|||
|
|
case "UDP":
|
|||
|
|
protoStr = "udp"
|
|||
|
|
case "SCTP":
|
|||
|
|
protoStr = "sctp"
|
|||
|
|
default:
|
|||
|
|
return nil, fmt.Errorf("port[%d]: protocol %q not supported", i, *p.Protocol)
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
// Spec default: TCP. We use empty string to mean "any of
|
|||
|
|
// the three" only when the user explicitly sets neither
|
|||
|
|
// protocol nor port; here the user has supplied a Port,
|
|||
|
|
// which implies a protocol — and the spec default is TCP.
|
|||
|
|
protoStr = "tcp"
|
|||
|
|
}
|
|||
|
|
var port, endPort int
|
|||
|
|
if p.Port != nil {
|
|||
|
|
if p.Port.Type != 0 { // intstr.Int = 0; intstr.String = 1
|
|||
|
|
return nil, fmt.Errorf("port[%d]: named ports are not yet supported", i)
|
|||
|
|
}
|
|||
|
|
port = int(p.Port.IntVal)
|
|||
|
|
}
|
|||
|
|
if p.EndPort != nil {
|
|||
|
|
endPort = int(*p.EndPort)
|
|||
|
|
if endPort < port {
|
|||
|
|
return nil, fmt.Errorf("port[%d]: endPort %d < port %d", i, endPort, port)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
out = append(out, PortMatch{Protocol: protoStr, Port: port, EndPort: endPort})
|
|||
|
|
}
|
|||
|
|
return out, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// assembleRules emits the cross-product of (one peer-set) × (port list).
|
|||
|
|
// We currently emit a single Rule per direction since the peer-set is the
|
|||
|
|
// expensive shared field; ports go inline. allowAll peers result in a
|
|||
|
|
// rule with no PeerCIDRs, which the renderer treats as "any source".
|
|||
|
|
func assembleRules(pod Pod, dir Direction, peers peerSet, ports []PortMatch) []Rule {
|
|||
|
|
if !peers.allowAll && len(peers.CIDRs) == 0 {
|
|||
|
|
// Selector matched no peers (e.g. podSelector for a label that
|
|||
|
|
// no live pod has). Emit nothing — the rule cannot allow any
|
|||
|
|
// real traffic. The pod stays in default-deny for this rule.
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
r := Rule{
|
|||
|
|
PodKey: pod.Namespace + "/" + pod.Name,
|
|||
|
|
HostIface: pod.HostIface,
|
|||
|
|
PodIPs: append([]net.IP(nil), pod.IPs...),
|
|||
|
|
Direction: dir,
|
|||
|
|
Action: ActionAccept,
|
|||
|
|
Ports: append([]PortMatch(nil), ports...),
|
|||
|
|
}
|
|||
|
|
if !peers.allowAll {
|
|||
|
|
r.PeerCIDRs = append([]*net.IPNet(nil), peers.CIDRs...)
|
|||
|
|
r.PeerExcept = append([]*net.IPNet(nil), peers.Except...)
|
|||
|
|
}
|
|||
|
|
return []Rule{r}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// canonicalisePolicies sorts the policy slice by (namespace, name) so the
|
|||
|
|
// translator's output is deterministic regardless of informer event order.
|
|||
|
|
func canonicalisePolicies(p []netv1.NetworkPolicy) []netv1.NetworkPolicy {
|
|||
|
|
out := append([]netv1.NetworkPolicy(nil), p...)
|
|||
|
|
sort.Slice(out, func(i, j int) bool {
|
|||
|
|
if out[i].Namespace != out[j].Namespace {
|
|||
|
|
return out[i].Namespace < out[j].Namespace
|
|||
|
|
}
|
|||
|
|
return out[i].Name < out[j].Name
|
|||
|
|
})
|
|||
|
|
return out
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func indexNamespaces(nss []Namespace) map[string]Namespace {
|
|||
|
|
out := make(map[string]Namespace, len(nss))
|
|||
|
|
for _, ns := range nss {
|
|||
|
|
out[ns.Name] = ns
|
|||
|
|
}
|
|||
|
|
return out
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func indexPeerPods(pods []PeerPod) map[string][]PeerPod {
|
|||
|
|
out := map[string][]PeerPod{}
|
|||
|
|
for _, p := range pods {
|
|||
|
|
out[p.Namespace] = append(out[p.Namespace], p)
|
|||
|
|
}
|
|||
|
|
// Sort each namespace's pod list by (name) so the translator's IP
|
|||
|
|
// ordering is stable.
|
|||
|
|
for k := range out {
|
|||
|
|
sort.Slice(out[k], func(i, j int) bool { return out[k][i].Name < out[k][j].Name })
|
|||
|
|
}
|
|||
|
|
return out
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ipToHostCIDR returns ip/32 (v4) or ip/128 (v6) — the smallest CIDR
|
|||
|
|
// covering exactly that one address.
|
|||
|
|
func ipToHostCIDR(ip net.IP) *net.IPNet {
|
|||
|
|
if v4 := ip.To4(); v4 != nil {
|
|||
|
|
return &net.IPNet{IP: v4, Mask: net.CIDRMask(32, 32)}
|
|||
|
|
}
|
|||
|
|
return &net.IPNet{IP: ip.To16(), Mask: net.CIDRMask(128, 128)}
|
|||
|
|
}
|