commit 01e4b58c915425dd1f7fc1b78c715bb56e159e32 Author: Donavan Fritz Date: Fri Apr 24 17:14:56 2026 -0500 Initial commit: dns-webhook MutatingAdmissionWebhook Rewrites dnsPolicy+dnsConfig on ClusterFirst pods to distribute queries across 3 randomly-selected auth-dns nameservers with edns0/rotate/ndots:5. Includes Gitea CI workflow and README. Co-Authored-By: Claude Sonnet 4.6 diff --git a/.gitea/workflows/main.yaml b/.gitea/workflows/main.yaml new file mode 100644 index 0000000..f2e6cbb --- /dev/null +++ b/.gitea/workflows/main.yaml @@ -0,0 +1,45 @@ +name: Build dns-webhook Image +on: + push: + branches: [main] +jobs: + build: + runs-on: fritzlab + steps: + - name: Check out repo + uses: actions/checkout@v4 + + - name: Log in to Gitea registry + uses: docker/login-action@v3 + with: + registry: code.fritzlab.net + username: ci-bot + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: code.fritzlab.net/dns/webhook + tags: | + type=raw,value=${{ github.run_number }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + push: true + provenance: false + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + network: host + + - name: Clean up old image tags + run: | + tea login add --name ci --url https://code.fritzlab.net --token '${{ secrets.CI_BOT_TOKEN }}' --no-version-check + tea api '/packages/dns?type=container' | jq -r '.[] | select(.name=="webhook") | select(.version | test("^[0-9]+$")) | .version' \ + | sort -n | head -n -3 \ + | while read tag; do + echo "deleting webhook:$tag" + tea api -X DELETE "/packages/dns/container/webhook/$tag" + done diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..100861f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM golang:1.26-alpine AS builder +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY *.go ./ +RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /dns-webhook . + +FROM gcr.io/distroless/static:nonroot +COPY --from=builder /dns-webhook /dns-webhook +ENTRYPOINT ["/dns-webhook"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..d31a22e --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +# dns-webhook + +A Kubernetes [MutatingAdmissionWebhook](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) that rewrites DNS configuration on every new pod before it starts. + +## What it does + +When a pod is created with the default `dnsPolicy: ClusterFirst`, this webhook intercepts the request and: + +1. **Picks 3 random nameservers** from the pool of 4 production auth-dns pods (ns1–ns4). This distributes DNS query load instead of every pod always hitting the same two servers. +2. **Sets search domains** appropriate for the pod's namespace so short service names resolve correctly. +3. **Enables `edns0`** — allows DNS responses larger than 512 bytes (needed for DNSSEC and large TXT records). +4. **Enables `rotate`** — cycles through nameservers on each query for even load distribution. + +Pods that opt out (`dnsPolicy: None`, `Default`, or `ClusterFirstWithHostNet`) are passed through unchanged. + +## Architecture + +``` +kubelet / kubectl apply + │ + ▼ +Kubernetes API server + │ (Pod CREATE request) + ▼ +MutatingAdmissionWebhook ──► dns-webhook pod (this service) + │ │ + │ ◄── JSON Patch ─────────┘ + │ replace dnsPolicy → None + │ add dnsConfig { nameservers, searches, options } + ▼ +Pod stored with rewritten DNS config +``` + +The webhook runs as a Deployment in `kube-system` and is registered via a `MutatingWebhookConfiguration`. cert-manager issues the TLS certificate; its cainjector populates the `caBundle` field automatically. + +## Logs + +Key log lines to watch for during debugging: + +| Prefix | Meaning | +|--------|---------| +| `dns-webhook starting: cert=... key=...` | Server startup — confirms TLS paths | +| `MUTATE pod=/ uid=... nameservers=[...] op=add\|replace` | Pod was mutated — shows which nameservers were assigned | +| `SKIP pod=/ uid=... policy=` | Pod was not mutated — shows why (non-ClusterFirst policy) | +| `ERROR ...` | Decode/encode failures — should never appear in normal operation | + +```bash +# Stream logs from all webhook replicas +kubectl --context sjc001 logs -n kube-system -l app=dns-webhook -f + +# Verify a running pod received the correct DNS config +kubectl --context sjc001 exec -n -- cat /etc/resolv.conf +``` + +## Deployment + +Managed by ArgoCD. Manifests live in the `fritzlab/apps` repo under +`sjc001/kube-system/dns-webhook/manifests/`. + +``` +apps/sjc001/kube-system/dns-webhook/ +├── app.yaml # ArgoCD Application +└── manifests/ + ├── deployment.yaml # Webhook pods (2 replicas, dnsPolicy: Default) + ├── issuer.yaml # cert-manager: selfSigned → CA → leaf cert + ├── service.yaml # ClusterIP Service on :443 → pod :8443 + ├── serviceaccount.yaml + └── webhook.yaml # MutatingWebhookConfiguration +``` + +The `deployment.yaml` image tag (`code.fritzlab.net/fritzlab/dns-webhook:`) must be updated whenever a new image is built. CI in this repo produces the image; update the tag in `apps` to deploy. + +## Development + +```bash +# Build locally +go build ./... + +# Run tests (none yet — the mutation logic is straightforward enough that +# end-to-end verification via a test pod is more useful) +go test ./... + +# Build container image +docker build -t dns-webhook:local . +``` + +## Design notes + +- **`dnsPolicy: Default` on the webhook pods themselves**: avoids a circular dependency — if cluster DNS is disrupted, the webhook pods can still start because they use the node's `/etc/resolv.conf` directly. +- **`failurePolicy: Ignore`**: if the webhook is unavailable, pods are admitted without mutation rather than being blocked. Availability of workloads takes priority over DNS load balancing. +- **`imagePullPolicy: IfNotPresent`**: if cluster DNS is down at pod start time, the image pull (which needs DNS to reach the registry) would fail. This policy uses the locally cached image instead. +- **ClusterIP service (not headless)**: webhook calls are short-lived HTTP requests — the keepalive starvation problem that affects long-lived connections doesn't apply here. A stable VIP is the conventional pattern for webhook services. +- **Static nameserver IPs**: the auth-dns pods use `cni.projectcalico.org/ipAddrs` to pin their Calico-allocated IPv6 addresses across restarts, making them safe to hardcode here. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..436c616 --- /dev/null +++ b/go.mod @@ -0,0 +1,26 @@ +module code.fritzlab.net/dns/webhook + +go 1.23.0 + +require k8s.io/api v0.32.3 + +require ( + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/x448/float16 v0.8.4 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/text v0.19.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/apimachinery v0.32.3 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect + sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..28e6626 --- /dev/null +++ b/go.sum @@ -0,0 +1,95 @@ +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.32.3 h1:Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls= +k8s.io/api v0.32.3/go.mod h1:2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k= +k8s.io/apimachinery v0.32.3 h1:JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U= +k8s.io/apimachinery v0.32.3/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= +sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA= +sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/main.go b/main.go new file mode 100644 index 0000000..db38de8 --- /dev/null +++ b/main.go @@ -0,0 +1,284 @@ +// dns-webhook is a Kubernetes MutatingAdmissionWebhook. +// +// What is a MutatingAdmissionWebhook? +// +// When Kubernetes is about to create a resource (a Pod, Deployment, etc.), +// it first sends the resource's definition to every registered webhook for +// that resource type. The webhook can inspect the definition and return a +// set of JSON Patch operations — instructions that say "change field X to +// value Y before saving". Kubernetes applies those patches, then stores +// the final object. +// +// What does this webhook do? +// +// Every new Pod that uses Kubernetes cluster DNS (dnsPolicy: ClusterFirst) +// gets its DNS configuration rewritten so that: +// +// 1. Three nameserver IPs are chosen at random from the four production +// auth-dns pods (ns1–ns4). This spreads DNS query load across the +// pool instead of every pod hitting the same two servers. +// +// 2. The resolv.conf options "edns0" and "rotate" are enabled. +// - edns0 allows DNS responses larger than 512 bytes (needed for +// DNSSEC and large TXT records). +// - rotate makes the resolver cycle through the nameserver list so +// queries are distributed rather than always going to the +// first server that responds. +// +// Pods that already have explicit DNS config (DNSNone), use the node's +// own resolver (DNSDefault), or run with host networking + cluster DNS +// (ClusterFirstWithHostNet) are left untouched. +// +// TLS requirement: +// +// Kubernetes requires admission webhooks to speak HTTPS. This server +// loads a TLS certificate and key from /tls/ (written by cert-manager). +// The paths can be overridden with TLS_CERT / TLS_KEY environment variables. + +package main + +import ( + "encoding/json" + "fmt" + "io" + "log" + "math/rand" + "net/http" + "os" + + // admissionv1 contains the AdmissionReview, AdmissionRequest, and + // AdmissionResponse types that Kubernetes sends to / expects from the webhook. + admissionv1 "k8s.io/api/admission/v1" + + // corev1 contains Pod, PodSpec, PodDNSConfig, etc. + corev1 "k8s.io/api/core/v1" +) + +// nameserverPool lists the IPv6 addresses of the four production auth-dns pods +// (ns1–ns4). ns0 is the staging instance and is intentionally excluded. +// +// These are static pod IPs allocated by Calico IPAM. They are stable across +// pod restarts because each Deployment pins its IP with the annotation +// cni.projectcalico.org/ipAddrs. +var nameserverPool = [4]string{ + "2602:817:3000:c608::202", // ns1 + "2602:817:3008:c607::204", // ns2 + "2602:817:3000:c608::203", // ns3 + "2602:817:3008:c607::203", // ns4 +} + +// clusterDomain is the Kubernetes cluster domain configured in kubelet. +// Service FQDNs follow the pattern: ..svc. +const clusterDomain = "k8s.sjc001.fritzlab.net" + +// jsonPatch represents a single RFC 6902 JSON Patch operation. +// +// Kubernetes admission webhooks return patches in this format. The three +// fields mean: +// - Op: the operation — "add", "replace", or "remove" +// - Path: a slash-separated path into the JSON document, e.g. /spec/dnsPolicy +// - Value: the new value to set (omitted for "remove") +type jsonPatch struct { + Op string `json:"op"` + Path string `json:"path"` + Value interface{} `json:"value,omitempty"` +} + +// pickThree returns three randomly selected nameserver IPs from nameserverPool. +// +// rand.Perm(4) returns a random permutation of [0, 1, 2, 3], so we take the +// first three indices to get three distinct servers. Using a permutation +// (rather than three independent random picks) guarantees no duplicates. +func pickThree() []string { + idx := rand.Perm(4) + return []string{nameserverPool[idx[0]], nameserverPool[idx[1]], nameserverPool[idx[2]]} +} + +// strPtr is a small helper that returns a pointer to a string. +// +// The PodDNSConfigOption type uses *string for option values so that an +// absent value (nil) is distinguishable from an empty string (""). +func strPtr(s string) *string { return &s } + +// handleMutate is the HTTP handler for the /mutate endpoint. +// +// Kubernetes calls this endpoint with an AdmissionReview JSON body whenever a +// Pod CREATE request matches the MutatingWebhookConfiguration rules. The +// handler must: +// +// 1. Decode the AdmissionReview to get the Pod definition. +// 2. Decide whether to mutate the Pod. +// 3. Return an AdmissionReview response with Allowed: true and any patches. +func handleMutate(w http.ResponseWriter, r *http.Request) { + // Read the full request body. + body, err := io.ReadAll(r.Body) + if err != nil { + log.Printf("ERROR reading request body: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + // Unmarshal the outer AdmissionReview envelope. + // AdmissionReview is the top-level wrapper Kubernetes uses for both the + // incoming request and the outgoing response. + var review admissionv1.AdmissionReview + if err := json.Unmarshal(body, &review); err != nil { + log.Printf("ERROR unmarshalling AdmissionReview: %v", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + // Extract the Pod from review.Request.Object.Raw. + // Raw is the verbatim JSON of the resource being admitted. + req := review.Request + var pod corev1.Pod + if err := json.Unmarshal(req.Object.Raw, &pod); err != nil { + log.Printf("ERROR unmarshalling Pod (uid=%s): %v", req.UID, err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + // Only mutate pods that use ClusterFirst DNS policy. + // + // DNSPolicy determines how a pod resolves names: + // ClusterFirst — use cluster DNS (CoreDNS); fall back to upstream. + // This is the default for most workloads. + // DNSNone — pod supplies its own dnsConfig; nothing to do. + // DNSDefault — inherit the node's /etc/resolv.conf directly. + // Used by auth-dns pods themselves to avoid a + // circular dependency (DNS pod needs DNS to start). + // ClusterFirstWithHostNet — host-network pod that still wants cluster DNS; + // leave as-is to avoid breaking host-network semantics. + if pod.Spec.DNSPolicy != corev1.DNSClusterFirst { + log.Printf("SKIP pod=%s/%s uid=%s policy=%s (not ClusterFirst)", + req.Namespace, pod.Name, req.UID, pod.Spec.DNSPolicy) + respond(w, review, nil) // no patches, just allow + return + } + + // Determine the pod's namespace for the search domain list. + // Kubernetes guarantees this is set for namespaced resources, but fall back + // to "default" just in case. + ns := req.Namespace + if ns == "" { + ns = "default" + } + + // Build the DNS configuration we want every pod to have. + // + // Nameservers: three randomly chosen auth-dns pod IPs. + // + // Searches: resolv.conf search domains let short names be resolved without + // a fully-qualified domain name. For a pod in namespace "myapp": + // - myapp.svc.k8s.sjc001.fritzlab.net → finds Services in myapp + // - svc.k8s.sjc001.fritzlab.net → finds Services in any namespace + // - k8s.sjc001.fritzlab.net → catches cluster-level names + // So "kubectl exec mypod -- curl myservice" resolves without a FQDN. + // + // Options: + // ndots:5 — names with fewer than 5 dots are tried with search domains + // before being treated as absolute. This is the Kubernetes + // default and ensures short service names resolve correctly. + // edns0 — enables EDNS extension headers, allowing DNS responses up to + // 65535 bytes (default UDP cap is 512 bytes). + // rotate — cycle through the nameserver list on each query so load is + // distributed rather than always hitting the first entry. + nameservers := pickThree() + dnsConfig := corev1.PodDNSConfig{ + Nameservers: nameservers, + Searches: []string{ + fmt.Sprintf("%s.svc.%s", ns, clusterDomain), + fmt.Sprintf("svc.%s", clusterDomain), + clusterDomain, + }, + Options: []corev1.PodDNSConfigOption{ + {Name: "edns0"}, + {Name: "rotate"}, + {Name: "ndots", Value: strPtr("5")}, + }, + } + + // JSON Patch requires "add" if the field doesn't exist yet in the manifest, + // or "replace" if it does. Sending "replace" for a non-existent field + // (or "add" for an existing one) is a patch error that would reject the pod. + dnsConfigOp := "add" + if pod.Spec.DNSConfig != nil { + dnsConfigOp = "replace" + } + + log.Printf("MUTATE pod=%s/%s uid=%s nameservers=%v op=%s", + ns, pod.Name, req.UID, nameservers, dnsConfigOp) + + // Build the two patch operations: + // 1. Change dnsPolicy from ClusterFirst → None. + // DNSNone is required when you provide a custom dnsConfig; it tells + // kubelet to use exactly the config we supply and nothing else. + // 2. Set dnsConfig to our constructed value. + patches := []jsonPatch{ + {Op: "replace", Path: "/spec/dnsPolicy", Value: corev1.DNSNone}, + {Op: dnsConfigOp, Path: "/spec/dnsConfig", Value: dnsConfig}, + } + + respond(w, review, patches) +} + +// respond writes an AdmissionReview response back to the Kubernetes API server. +// +// Every call to the /mutate endpoint must return an AdmissionReview with: +// - UID matching the incoming request (so Kubernetes can correlate them) +// - Allowed: true (we never block pods, just modify them) +// - Patch / PatchType set when we have mutations to apply +func respond(w http.ResponseWriter, review admissionv1.AdmissionReview, patches []jsonPatch) { + resp := &admissionv1.AdmissionResponse{ + UID: review.Request.UID, + Allowed: true, + } + + if patches != nil { + // Serialize the patch list to JSON and set the patch type. + // JSONPatch is the only patch type supported by admission webhooks. + b, _ := json.Marshal(patches) + pt := admissionv1.PatchTypeJSONPatch + resp.Patch = b + resp.PatchType = &pt + } + + // Place the response inside the same AdmissionReview envelope that came in. + // Kubernetes reads review.Response; review.Request is ignored on the way out. + review.Response = resp + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(review); err != nil { + log.Printf("ERROR encoding AdmissionReview response (uid=%s): %v", resp.UID, err) + } +} + +func main() { + // TLS cert and key are written by cert-manager into a Kubernetes Secret, + // which is mounted into the pod at /tls/. The environment variables allow + // the paths to be overridden in development or testing. + certFile := getenv("TLS_CERT", "/tls/tls.crt") + keyFile := getenv("TLS_KEY", "/tls/tls.key") + + // /mutate receives AdmissionReview requests from the Kubernetes API server. + http.HandleFunc("/mutate", handleMutate) + + // /healthz is a simple liveness/readiness probe endpoint. + // kubelet calls this to decide whether the pod is healthy. + http.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + log.Printf("dns-webhook starting: cert=%s key=%s", certFile, keyFile) + + log.Println("dns-webhook listening on :8443") + // Kubernetes requires admission webhooks to use TLS — plain HTTP is rejected. + log.Fatal(http.ListenAndServeTLS(":8443", certFile, keyFile, nil)) +} + +// getenv returns the value of the environment variable key, or def if unset. +func getenv(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +}