From e9d3eef2cc202cfdd43a43ed34fd794a1d29b9d6 Mon Sep 17 00:00:00 2001 From: Donavan Fritz Date: Sat, 25 Apr 2026 22:22:39 -0500 Subject: [PATCH] netpol: accept established+related at top of every pod chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit K8s NetworkPolicy applies to the start of new connections; reply packets for established flows (and ICMP related) must not be matched against the explicit allow set. The pod ingress chain previously had only explicit dport allows + a final drop, so any reply to a pod-initiated outbound where the reply's dport (the ephemeral source port) wasn't in the allow set got dropped. Hit in production 2026-04-26: garage's `garage-admin-restrict` NP allowed dports 3900/80/3901/3903 only. Garage uses kubernetes_discovery to find peers — outbound to kube-apiserver succeeded, replies returned to ephemeral source ports, dropped → "Layout not ready" cluster-wide. Fix: emit `ct state established,related accept` as the first rule in every pod__(ingress|egress) chain. Regression test added. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- pkg/agent/netpol/render.go | 6 ++++++ pkg/agent/netpol/render_test.go | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/pkg/agent/netpol/render.go b/pkg/agent/netpol/render.go index bfc76f6..1494c06 100644 --- a/pkg/agent/netpol/render.go +++ b/pkg/agent/netpol/render.go @@ -161,6 +161,12 @@ func chainName(podKey string, dir Direction) string { // the chain's drop policy IS the default-deny. func writeChain(sb *strings.Builder, c chain) { fmt.Fprintf(sb, "\tchain %s {\n", c.name) + // Stateful accept for return traffic. NetworkPolicy applies to the + // start of a new connection — reply packets for pod-initiated flows + // (egress) and follow-up packets of an established ingress flow must + // pass regardless of the explicit allow set, otherwise the chain's + // final drop kills ephemeral-port replies (e.g. pod → kube-apiserver). + sb.WriteString("\t\tct state established,related accept\n") for _, r := range c.rules { writeAllowRule(sb, r) } diff --git a/pkg/agent/netpol/render_test.go b/pkg/agent/netpol/render_test.go index cdb233f..9cef529 100644 --- a/pkg/agent/netpol/render_test.go +++ b/pkg/agent/netpol/render_test.go @@ -39,6 +39,13 @@ func TestRender_DefaultDeny(t *testing.T) { if !strings.Contains(got, `oifname "flock00000001" jump pod_`) { t.Fatalf("missing veth-only ingress jump in base chain:\n%s", got) } + // Stateful accept must be present so reply traffic for pod-initiated + // outbound (e.g. ephemeral-port replies from kube-apiserver) is not + // dropped by the chain's final drop. Regression guard: production hit + // this when garage's k8s-discovery → apiserver replies got dropped. + if !strings.Contains(got, "ct state established,related accept") { + t.Fatalf("missing ct state established,related accept:\n%s", got) + } } // TestRender_DualStack — dual-stack pod gets one veth-anchored jump per