From 20f47916af40b955ebf19eb7ca41d94ad4872d03 Mon Sep 17 00:00:00 2001 From: Donavan Fritz Date: Fri, 24 Apr 2026 21:17:42 -0500 Subject: [PATCH] flock M1 scaffold: CNI plugin + agent + NodeConfig CRD - cmd/flock + cmd/flock-agent: build cleanly; CNI ADD/DEL/CHECK return ErrInternal stubs until M2; agent boots, opens unix socket, logs JSON. - pkg/agent/state.go: durable allocations.json (atomic write + fsync + parent fsync); pending/committed lifecycle. Tests cover round-trip, replace-by-cid, version mismatch, no-leak-on-tmp. - pkg/embed/suffix.go: ip-algo IID embedding. Tests cover the /48-/96 nibble distribution table from the design doc, determinism, prefix preservation, N-nibble isolation, digest-vs-fallback divergence. - pkg/api/v1alpha1: minimal NodeConfig types (no controller-runtime yet). - deploy/: NodeConfig CRD, empty ServiceAccount/ClusterRole, DaemonSet pinned to flock.fritzlab.net/agent="" label so it only runs on opted-in nodes. - .gitea/workflows/main.yaml + Dockerfile: build + push to code.fritzlab.net/fritzlab/flock; runs go test in CI. Design doc: dfritzlab/k8s-manager/dfritz-cni.md. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .dockerignore | 8 + .gitea/workflows/main.yaml | 60 ++++++ Dockerfile | 22 ++ LICENSE | 15 ++ README.md | 22 ++ cmd/flock-agent/main.go | 50 +++++ cmd/flock/main.go | 19 ++ .../crds/flock.fritzlab.net_nodeconfigs.yaml | 72 +++++++ deploy/daemonset.yaml | 84 ++++++++ deploy/install.yaml | 184 ++++++++++++++++ deploy/rbac/serviceaccount.yaml | 26 +++ go.mod | 10 + go.sum | 28 +++ pkg/agent/server.go | 100 +++++++++ pkg/agent/state.go | 204 ++++++++++++++++++ pkg/agent/state_test.go | 125 +++++++++++ pkg/api/v1alpha1/groupversion_info.go | 8 + pkg/api/v1alpha1/nodeconfig_types.go | 54 +++++ pkg/cni/plugin.go | 37 ++++ pkg/cni/rpc_client.go | 5 + pkg/embed/suffix.go | 174 +++++++++++++++ pkg/embed/suffix_test.go | 153 +++++++++++++ 22 files changed, 1460 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitea/workflows/main.yaml create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 cmd/flock-agent/main.go create mode 100644 cmd/flock/main.go create mode 100644 deploy/crds/flock.fritzlab.net_nodeconfigs.yaml create mode 100644 deploy/daemonset.yaml create mode 100644 deploy/install.yaml create mode 100644 deploy/rbac/serviceaccount.yaml create mode 100644 go.mod create mode 100644 go.sum create mode 100644 pkg/agent/server.go create mode 100644 pkg/agent/state.go create mode 100644 pkg/agent/state_test.go create mode 100644 pkg/api/v1alpha1/groupversion_info.go create mode 100644 pkg/api/v1alpha1/nodeconfig_types.go create mode 100644 pkg/cni/plugin.go create mode 100644 pkg/cni/rpc_client.go create mode 100644 pkg/embed/suffix.go create mode 100644 pkg/embed/suffix_test.go diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e906a95 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git +.gitea +deploy +hack +test +README.md +LICENSE +*.md diff --git a/.gitea/workflows/main.yaml b/.gitea/workflows/main.yaml new file mode 100644 index 0000000..8454c49 --- /dev/null +++ b/.gitea/workflows/main.yaml @@ -0,0 +1,60 @@ +name: Build flock Image +on: + push: + branches: [main] +jobs: + build: + runs-on: fritzlab + steps: + - name: Check out repo + uses: actions/checkout@v4 + + - name: Run unit tests + run: | + docker run --rm -v "$PWD:/src" -w /src golang:1.26-alpine \ + sh -c "go test ./..." + + - name: Log in to Gitea registry + uses: docker/login-action@v3 + with: + registry: code.fritzlab.net + username: ci-bot + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: code.fritzlab.net/fritzlab/flock + tags: | + type=raw,value=latest + type=raw,value=${{ github.run_number }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + push: true + provenance: false + build-args: | + GIT_SHA=${{ github.sha }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + network: host + + - name: Smoke-test image + run: | + docker run --rm code.fritzlab.net/fritzlab/flock:${{ github.run_number }} --help || true + docker run --rm --entrypoint /usr/local/bin/flock \ + code.fritzlab.net/fritzlab/flock:${{ github.run_number }} || true + + - name: Clean up old image tags + run: | + tea login add --name ci --url https://code.fritzlab.net --token '${{ secrets.CI_BOT_TOKEN }}' --no-version-check + tea api '/packages/fritzlab?type=container' \ + | jq -r '.[] | select(.name=="flock") | select(.version | test("^[0-9]+$")) | .version' \ + | sort -n | head -n -3 \ + | while read tag; do + echo "deleting flock:$tag" + tea api -X DELETE "/packages/fritzlab/container/flock/$tag" + done diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bd24ba2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM golang:1.26-alpine AS build + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download + +COPY cmd/ ./cmd/ +COPY pkg/ ./pkg/ + +ARG GIT_SHA=unknown +RUN CGO_ENABLED=0 go build -trimpath \ + -ldflags="-s -w -X main.gitSHA=${GIT_SHA}" \ + -o /out/flock ./cmd/flock \ + && CGO_ENABLED=0 go build -trimpath \ + -ldflags="-s -w -X main.gitSHA=${GIT_SHA}" \ + -o /out/flock-agent ./cmd/flock-agent + +FROM alpine:3.21 +RUN apk add --no-cache iproute2 ca-certificates +COPY --from=build /out/flock /usr/local/bin/flock +COPY --from=build /out/flock-agent /usr/local/bin/flock-agent +ENTRYPOINT ["/usr/local/bin/flock-agent"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d07ae9a --- /dev/null +++ b/LICENSE @@ -0,0 +1,15 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..078d2e0 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# flock + +Kubernetes CNI for sjc001. Per-pod IPv4 opt-in, IID embedding, Ready-gated anycast via BGP. + +Design doc: `k8s-manager/dfritz-cni.md` (in the operator's k8s-manager repo). + +Status: M1 scaffold. Not functional. See milestones table in the design doc. + +## Layout + +- `cmd/flock` — CNI plugin binary (kubelet-invoked) +- `cmd/flock-agent` — DaemonSet binary +- `pkg/api/v1alpha1` — `NodeConfig` CRD types +- `pkg/cni` — CNI plugin internals + RPC client +- `pkg/agent` — agent server, IPAM, state file, anycast, NetworkPolicy +- `pkg/embed` — `ip-algo` IID embedding (pure) +- `pkg/routing/{bird,ospf}` — routing backends +- `deploy/` — CRDs, RBAC, DaemonSet manifests + +## License + +Apache 2.0. diff --git a/cmd/flock-agent/main.go b/cmd/flock-agent/main.go new file mode 100644 index 0000000..83509c7 --- /dev/null +++ b/cmd/flock-agent/main.go @@ -0,0 +1,50 @@ +// Command flock-agent is the per-node DaemonSet binary. It owns IPAM, netns +// programming, BIRD config, and nftables. M1 boots only the state store and a +// placeholder unix listener. +package main + +import ( + "context" + "flag" + "log/slog" + "os" + "os/signal" + "syscall" + + "code.fritzlab.net/fritzlab/flock/pkg/agent" +) + +func main() { + var ( + node = flag.String("node", os.Getenv("NODE_NAME"), "node name (defaults to $NODE_NAME)") + statePath = flag.String("state", "/var/lib/flock/allocations.json", "path to allocations.json") + socket = flag.String("socket", agent.SocketPath, "unix socket for CNI RPC") + ) + flag.Parse() + + logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})) + + if *node == "" { + logger.Error("--node or $NODE_NAME is required") + os.Exit(2) + } + + srv, err := agent.NewServer(agent.Config{ + Node: *node, + StatePath: *statePath, + Socket: *socket, + Logger: logger, + }) + if err != nil { + logger.Error("init server", "err", err) + os.Exit(1) + } + + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + if err := srv.Run(ctx); err != nil { + logger.Error("run", "err", err) + os.Exit(1) + } +} diff --git a/cmd/flock/main.go b/cmd/flock/main.go new file mode 100644 index 0000000..77b6435 --- /dev/null +++ b/cmd/flock/main.go @@ -0,0 +1,19 @@ +// Command flock is the kubelet-invoked CNI plugin binary. It is short-lived: +// kubelet execs it for ADD/DEL/CHECK, the binary forwards to flock-agent over +// a unix socket, and exits. +package main + +import ( + "github.com/containernetworking/cni/pkg/skel" + "github.com/containernetworking/cni/pkg/version" + + "code.fritzlab.net/fritzlab/flock/pkg/cni" +) + +func main() { + skel.PluginMainFuncs(skel.CNIFuncs{ + Add: cni.CmdAdd, + Del: cni.CmdDel, + Check: cni.CmdCheck, + }, version.All, "flock") +} diff --git a/deploy/crds/flock.fritzlab.net_nodeconfigs.yaml b/deploy/crds/flock.fritzlab.net_nodeconfigs.yaml new file mode 100644 index 0000000..9da50a3 --- /dev/null +++ b/deploy/crds/flock.fritzlab.net_nodeconfigs.yaml @@ -0,0 +1,72 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: nodeconfigs.flock.fritzlab.net +spec: + group: flock.fritzlab.net + scope: Cluster + names: + kind: NodeConfig + listKind: NodeConfigList + singular: nodeconfig + plural: nodeconfigs + shortNames: + - fnc + versions: + - name: v1alpha1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + required: [spec] + properties: + spec: + type: object + required: [bgp] + properties: + cidr6: + type: array + items: + type: string + description: IPv6 CIDR owned and aggregate-advertised by this node. + cidr4: + type: array + items: + type: string + description: IPv4 CIDR owned and aggregate-advertised by this node. + bgp: + type: object + required: [asn, peers] + properties: + asn: + type: integer + format: int64 + minimum: 1 + maximum: 4294967295 + description: This node's local ASN. + peers: + type: array + minItems: 1 + items: + type: object + required: [address, asn] + properties: + address: + type: string + description: Peer IP (IPv6 or IPv4). + asn: + type: integer + format: int64 + minimum: 1 + maximum: 4294967295 + additionalPrinterColumns: + - name: ASN + type: integer + jsonPath: .spec.bgp.asn + - name: CIDR6 + type: string + jsonPath: .spec.cidr6 + - name: CIDR4 + type: string + jsonPath: .spec.cidr4 diff --git a/deploy/daemonset.yaml b/deploy/daemonset.yaml new file mode 100644 index 0000000..2a4331b --- /dev/null +++ b/deploy/daemonset.yaml @@ -0,0 +1,84 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: flock-agent + namespace: kube-system + labels: + app: flock-agent +spec: + selector: + matchLabels: + app: flock-agent + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + app: flock-agent + spec: + serviceAccountName: flock-agent + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + priorityClassName: system-node-critical + # M1: opt-in per-node via this label. Remove the nodeSelector when + # ready to roll flock to all nodes. + nodeSelector: + flock.fritzlab.net/agent: "" + # Tolerate the cni-test taint applied during migration. + tolerations: + - key: fritzlab.net/cni-test + operator: Equal + value: "true" + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/not-ready + operator: Exists + effect: NoExecute + - key: node.kubernetes.io/unreachable + operator: Exists + effect: NoExecute + containers: + - name: flock-agent + image: code.fritzlab.net/fritzlab/flock:latest + imagePullPolicy: Always + args: + - --node=$(NODE_NAME) + - --state=/var/lib/flock/allocations.json + - --socket=/run/flock/flock.sock + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + # M1: no privileged caps. M2 adds NET_ADMIN/NET_RAW for netlink. + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + volumeMounts: + - name: lib-flock + mountPath: /var/lib/flock + - name: run-flock + mountPath: /run/flock + resources: + requests: + cpu: 25m + memory: 32Mi + limits: + memory: 128Mi + volumes: + - name: lib-flock + hostPath: + path: /var/lib/flock + type: DirectoryOrCreate + - name: run-flock + hostPath: + path: /run/flock + type: DirectoryOrCreate + imagePullSecrets: + - name: code-fritzlab-net diff --git a/deploy/install.yaml b/deploy/install.yaml new file mode 100644 index 0000000..6ba3c6f --- /dev/null +++ b/deploy/install.yaml @@ -0,0 +1,184 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: nodeconfigs.flock.fritzlab.net +spec: + group: flock.fritzlab.net + scope: Cluster + names: + kind: NodeConfig + listKind: NodeConfigList + singular: nodeconfig + plural: nodeconfigs + shortNames: + - fnc + versions: + - name: v1alpha1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + required: [spec] + properties: + spec: + type: object + required: [bgp] + properties: + cidr6: + type: array + items: + type: string + description: IPv6 CIDR owned and aggregate-advertised by this node. + cidr4: + type: array + items: + type: string + description: IPv4 CIDR owned and aggregate-advertised by this node. + bgp: + type: object + required: [asn, peers] + properties: + asn: + type: integer + format: int64 + minimum: 1 + maximum: 4294967295 + description: This node's local ASN. + peers: + type: array + minItems: 1 + items: + type: object + required: [address, asn] + properties: + address: + type: string + description: Peer IP (IPv6 or IPv4). + asn: + type: integer + format: int64 + minimum: 1 + maximum: 4294967295 + additionalPrinterColumns: + - name: ASN + type: integer + jsonPath: .spec.bgp.asn + - name: CIDR6 + type: string + jsonPath: .spec.cidr6 + - name: CIDR4 + type: string + jsonPath: .spec.cidr4 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: flock-agent + namespace: kube-system +--- +# M1 RBAC: empty. The agent does not yet read any Kubernetes objects. +# M2+ will add Pod, NetworkPolicy, and NodeConfig permissions here. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: flock-agent +rules: [] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: flock-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: flock-agent +subjects: + - kind: ServiceAccount + name: flock-agent + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: flock-agent + namespace: kube-system + labels: + app: flock-agent +spec: + selector: + matchLabels: + app: flock-agent + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + app: flock-agent + spec: + serviceAccountName: flock-agent + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + priorityClassName: system-node-critical + # M1: opt-in per-node via this label. Remove the nodeSelector when + # ready to roll flock to all nodes. + nodeSelector: + flock.fritzlab.net/agent: "" + # Tolerate the cni-test taint applied during migration. + tolerations: + - key: fritzlab.net/cni-test + operator: Equal + value: "true" + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/not-ready + operator: Exists + effect: NoExecute + - key: node.kubernetes.io/unreachable + operator: Exists + effect: NoExecute + containers: + - name: flock-agent + image: code.fritzlab.net/fritzlab/flock:latest + imagePullPolicy: Always + args: + - --node=$(NODE_NAME) + - --state=/var/lib/flock/allocations.json + - --socket=/run/flock/flock.sock + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + # M1: no privileged caps. M2 adds NET_ADMIN/NET_RAW for netlink. + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + volumeMounts: + - name: lib-flock + mountPath: /var/lib/flock + - name: run-flock + mountPath: /run/flock + resources: + requests: + cpu: 25m + memory: 32Mi + limits: + memory: 128Mi + volumes: + - name: lib-flock + hostPath: + path: /var/lib/flock + type: DirectoryOrCreate + - name: run-flock + hostPath: + path: /run/flock + type: DirectoryOrCreate + imagePullSecrets: + - name: code-fritzlab-net diff --git a/deploy/rbac/serviceaccount.yaml b/deploy/rbac/serviceaccount.yaml new file mode 100644 index 0000000..3baf96c --- /dev/null +++ b/deploy/rbac/serviceaccount.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: flock-agent + namespace: kube-system +--- +# M1 RBAC: empty. The agent does not yet read any Kubernetes objects. +# M2+ will add Pod, NetworkPolicy, and NodeConfig permissions here. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: flock-agent +rules: [] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: flock-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: flock-agent +subjects: + - kind: ServiceAccount + name: flock-agent + namespace: kube-system diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ba8007e --- /dev/null +++ b/go.mod @@ -0,0 +1,10 @@ +module code.fritzlab.net/fritzlab/flock + +go 1.26.1 + +require github.com/containernetworking/cni v1.3.0 + +require ( + github.com/vishvananda/netns v0.0.4 // indirect + golang.org/x/sys v0.23.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..d84f326 --- /dev/null +++ b/go.sum @@ -0,0 +1,28 @@ +github.com/containernetworking/cni v1.3.0 h1:v6EpN8RznAZj9765HhXQrtXgX+ECGebEYEmnuFjskwo= +github.com/containernetworking/cni v1.3.0/go.mod h1:Bs8glZjjFfGPHMw6hQu82RUgEPNGEaBb9KS5KtNMnJ4= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k= +github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo= +github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI= +github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= +github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= +golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/agent/server.go b/pkg/agent/server.go new file mode 100644 index 0000000..a3038b2 --- /dev/null +++ b/pkg/agent/server.go @@ -0,0 +1,100 @@ +package agent + +import ( + "context" + "fmt" + "log/slog" + "net" + "os" + "path/filepath" +) + +// SocketPath is the unix socket on which flock-agent serves RPCs from the +// CNI plugin. Mirrors pkg/cni.SocketPath; kept as a separate constant so the +// agent package has no import-cycle on the CNI package. +const SocketPath = "/run/flock/flock.sock" + +// Server is the agent's runtime container: state store, kubernetes informers, +// netlink, BIRD, nftables. M1 wires only the state store and a placeholder +// listener so the binary boots and exits cleanly under a context. +type Server struct { + Node string + Store *Store + Logger *slog.Logger + socket string + closeCh chan struct{} +} + +// Config configures NewServer. +type Config struct { + Node string + StatePath string // typically /var/lib/flock/allocations.json + Socket string // typically /run/flock/flock.sock + Logger *slog.Logger +} + +// NewServer constructs a Server. It does NOT start any goroutines; call Run. +func NewServer(cfg Config) (*Server, error) { + if cfg.Node == "" { + return nil, fmt.Errorf("Node must be set") + } + if cfg.StatePath == "" { + cfg.StatePath = "/var/lib/flock/allocations.json" + } + if cfg.Socket == "" { + cfg.Socket = SocketPath + } + if cfg.Logger == nil { + cfg.Logger = slog.Default() + } + if err := os.MkdirAll(filepath.Dir(cfg.StatePath), 0o750); err != nil { + return nil, fmt.Errorf("mkdir state dir: %w", err) + } + store, err := NewStore(cfg.StatePath, cfg.Node) + if err != nil { + return nil, fmt.Errorf("open store: %w", err) + } + return &Server{ + Node: cfg.Node, + Store: store, + Logger: cfg.Logger, + socket: cfg.Socket, + closeCh: make(chan struct{}), + }, nil +} + +// Run starts the agent and blocks until ctx is cancelled. M1 only opens the +// unix listener (proving permissions/path); the RPC handler is a no-op +// returning ENOSYS until M2. +func (s *Server) Run(ctx context.Context) error { + if err := os.MkdirAll(filepath.Dir(s.socket), 0o750); err != nil { + return fmt.Errorf("mkdir socket dir: %w", err) + } + _ = os.Remove(s.socket) + l, err := net.Listen("unix", s.socket) + if err != nil { + return fmt.Errorf("listen %s: %w", s.socket, err) + } + defer l.Close() + + s.Logger.Info("flock-agent started", + "node", s.Node, + "socket", s.socket, + "allocations", len(s.Store.Snapshot()), + ) + + // Accept loop: M1 closes every accepted conn immediately. M2 will dispatch. + go func() { + for { + conn, err := l.Accept() + if err != nil { + return // listener closed + } + _ = conn.Close() + } + }() + + <-ctx.Done() + s.Logger.Info("flock-agent stopping") + return nil +} diff --git a/pkg/agent/state.go b/pkg/agent/state.go new file mode 100644 index 0000000..f02ebc6 --- /dev/null +++ b/pkg/agent/state.go @@ -0,0 +1,204 @@ +// Package agent owns the in-process flock-agent runtime: IPAM, netns, state, +// anycast, and NetworkPolicy. This file implements the durable per-node +// allocation file at /var/lib/flock/allocations.json. +package agent + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +// AllocationState is the lifecycle marker for an entry in allocations.json. +// +// pending — IPAM picked addresses; netlink work may be incomplete. +// On agent startup these are GC'd: addrs/routes/veth removed. +// committed — netlink ops finished; entry is the source of truth. +type AllocationState string + +const ( + StatePending AllocationState = "pending" + StateCommitted AllocationState = "committed" +) + +// Allocation is a single per-pod entry persisted in allocations.json. +type Allocation struct { + ContainerID string `json:"container_id"` + Namespace string `json:"namespace"` + PodName string `json:"pod_name"` + OwnerUID string `json:"owner_uid"` + IP6 string `json:"ip6,omitempty"` + IP4 string `json:"ip4,omitempty"` + Anycast []string `json:"anycast,omitempty"` + State AllocationState `json:"state"` + AllocatedAt time.Time `json:"allocated_at"` +} + +// State is the on-disk file shape. Version is bumped on incompatible changes. +type State struct { + Version int `json:"version"` + Node string `json:"node"` + Allocations []Allocation `json:"allocations"` +} + +const stateVersion = 1 + +// Store is the durable allocation store. +// +// All public methods are safe for concurrent use. They serialize through +// a single mutex so that the on-disk file is always consistent and so that +// the CNI ADD/DEL critical sections (which mutate kernel state alongside the +// file) can rely on snapshot semantics. +type Store struct { + mu sync.Mutex + path string + node string + data State +} + +// NewStore opens (or creates) a per-node store. The directory containing +// `path` must already exist; we do not create it because in production it is +// /var/lib/flock managed by the DaemonSet, not the agent process. +func NewStore(path, node string) (*Store, error) { + s := &Store{path: path, node: node} + if err := s.load(); err != nil { + return nil, err + } + return s, nil +} + +func (s *Store) load() error { + b, err := os.ReadFile(s.path) + if os.IsNotExist(err) { + s.data = State{Version: stateVersion, Node: s.node, Allocations: []Allocation{}} + return nil + } + if err != nil { + return fmt.Errorf("read state: %w", err) + } + if err := json.Unmarshal(b, &s.data); err != nil { + return fmt.Errorf("parse state: %w", err) + } + if s.data.Version != stateVersion { + return fmt.Errorf("state version %d, want %d", s.data.Version, stateVersion) + } + if s.data.Allocations == nil { + s.data.Allocations = []Allocation{} + } + return nil +} + +// flushLocked writes the in-memory state to disk durably: +// +// 1. write to .tmp +// 2. fsync(tmpfd) +// 3. rename to +// 4. fsync(parent dir) — required so the rename survives power loss. +// +// Caller MUST hold s.mu. +func (s *Store) flushLocked() error { + b, err := json.MarshalIndent(s.data, "", " ") + if err != nil { + return fmt.Errorf("marshal state: %w", err) + } + + tmp := s.path + ".tmp" + f, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) + if err != nil { + return fmt.Errorf("open tmp: %w", err) + } + if _, err := f.Write(b); err != nil { + f.Close() + os.Remove(tmp) + return fmt.Errorf("write tmp: %w", err) + } + if err := f.Sync(); err != nil { + f.Close() + os.Remove(tmp) + return fmt.Errorf("fsync tmp: %w", err) + } + if err := f.Close(); err != nil { + os.Remove(tmp) + return fmt.Errorf("close tmp: %w", err) + } + if err := os.Rename(tmp, s.path); err != nil { + os.Remove(tmp) + return fmt.Errorf("rename: %w", err) + } + + dir, err := os.Open(filepath.Dir(s.path)) + if err != nil { + return fmt.Errorf("open parent: %w", err) + } + defer dir.Close() + if err := dir.Sync(); err != nil { + return fmt.Errorf("fsync parent: %w", err) + } + return nil +} + +// Get returns the entry for containerID, ok=false if absent. Returned value +// is a copy; mutations do not affect store state. +func (s *Store) Get(containerID string) (Allocation, bool) { + s.mu.Lock() + defer s.mu.Unlock() + for _, a := range s.data.Allocations { + if a.ContainerID == containerID { + return a, true + } + } + return Allocation{}, false +} + +// Upsert inserts or replaces the entry for a.ContainerID and flushes. +func (s *Store) Upsert(a Allocation) error { + s.mu.Lock() + defer s.mu.Unlock() + for i, ex := range s.data.Allocations { + if ex.ContainerID == a.ContainerID { + s.data.Allocations[i] = a + return s.flushLocked() + } + } + s.data.Allocations = append(s.data.Allocations, a) + return s.flushLocked() +} + +// Delete removes the entry for containerID (no-op if absent) and flushes. +func (s *Store) Delete(containerID string) error { + s.mu.Lock() + defer s.mu.Unlock() + for i, a := range s.data.Allocations { + if a.ContainerID == containerID { + s.data.Allocations = append(s.data.Allocations[:i], s.data.Allocations[i+1:]...) + return s.flushLocked() + } + } + return nil +} + +// Snapshot returns a defensive copy of all allocations. +func (s *Store) Snapshot() []Allocation { + s.mu.Lock() + defer s.mu.Unlock() + out := make([]Allocation, len(s.data.Allocations)) + copy(out, s.data.Allocations) + return out +} + +// PendingContainerIDs returns container IDs whose entries are State==pending. +// Used by agent startup GC. +func (s *Store) PendingContainerIDs() []string { + s.mu.Lock() + defer s.mu.Unlock() + var out []string + for _, a := range s.data.Allocations { + if a.State == StatePending { + out = append(out, a.ContainerID) + } + } + return out +} diff --git a/pkg/agent/state_test.go b/pkg/agent/state_test.go new file mode 100644 index 0000000..50a1640 --- /dev/null +++ b/pkg/agent/state_test.go @@ -0,0 +1,125 @@ +package agent + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +func newStore(t *testing.T) (*Store, string) { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "allocations.json") + s, err := NewStore(path, "host001") + if err != nil { + t.Fatalf("NewStore: %v", err) + } + return s, path +} + +func TestStore_EmptyOnFirstOpen(t *testing.T) { + s, _ := newStore(t) + if got := len(s.Snapshot()); got != 0 { + t.Fatalf("Snapshot len = %d, want 0", got) + } +} + +func TestStore_UpsertGetDelete(t *testing.T) { + s, path := newStore(t) + a := Allocation{ + ContainerID: "abc", + Namespace: "mail", + PodName: "stalwart-0", + OwnerUID: "uid-1", + IP6: "2602:817:3000:f001::1", + State: StateCommitted, + AllocatedAt: time.Now().UTC().Truncate(time.Second), + } + if err := s.Upsert(a); err != nil { + t.Fatalf("Upsert: %v", err) + } + + got, ok := s.Get("abc") + if !ok || got.PodName != "stalwart-0" { + t.Fatalf("Get after Upsert: ok=%v got=%+v", ok, got) + } + + // Round-trip: a fresh Store reading the same path sees the entry. + s2, err := NewStore(path, "host001") + if err != nil { + t.Fatalf("reopen: %v", err) + } + if got, ok := s2.Get("abc"); !ok || got.IP6 != a.IP6 { + t.Fatalf("reopen Get: ok=%v got=%+v", ok, got) + } + + if err := s.Delete("abc"); err != nil { + t.Fatalf("Delete: %v", err) + } + if _, ok := s.Get("abc"); ok { + t.Fatalf("entry still present after Delete") + } +} + +func TestStore_UpsertReplacesByContainerID(t *testing.T) { + s, _ := newStore(t) + must := func(err error) { + t.Helper() + if err != nil { + t.Fatal(err) + } + } + must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::1", State: StatePending})) + must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::2", State: StateCommitted})) + if got := len(s.Snapshot()); got != 1 { + t.Fatalf("len = %d, want 1 (Upsert should replace)", got) + } + if a, _ := s.Get("abc"); a.IP6 != "::2" || a.State != StateCommitted { + t.Fatalf("Upsert did not replace: %+v", a) + } +} + +func TestStore_PendingContainerIDs(t *testing.T) { + s, _ := newStore(t) + _ = s.Upsert(Allocation{ContainerID: "p1", State: StatePending}) + _ = s.Upsert(Allocation{ContainerID: "c1", State: StateCommitted}) + _ = s.Upsert(Allocation{ContainerID: "p2", State: StatePending}) + + pend := s.PendingContainerIDs() + if len(pend) != 2 { + t.Fatalf("PendingContainerIDs len = %d, want 2", len(pend)) + } + have := map[string]bool{pend[0]: true, pend[1]: true} + if !have["p1"] || !have["p2"] { + t.Fatalf("PendingContainerIDs = %v, want p1,p2", pend) + } +} + +func TestStore_RejectsWrongVersion(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "allocations.json") + if err := os.WriteFile(path, []byte(`{"version":99,"node":"x","allocations":[]}`), 0o600); err != nil { + t.Fatal(err) + } + if _, err := NewStore(path, "x"); err == nil { + t.Fatalf("expected error on bad version, got nil") + } +} + +func TestStore_AtomicWriteDurability(t *testing.T) { + // We can't simulate a real power-loss in unit tests, but we can verify + // that no .tmp file is left behind after a successful flush, and that + // the rename target is intact. + s, path := newStore(t) + if err := s.Upsert(Allocation{ContainerID: "x", State: StateCommitted}); err != nil { + t.Fatal(err) + } + if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) { + t.Fatalf(".tmp leaked: err=%v", err) + } + b, err := os.ReadFile(path) + if err != nil || len(b) == 0 { + t.Fatalf("final file unreadable: err=%v len=%d", err, len(b)) + } +} diff --git a/pkg/api/v1alpha1/groupversion_info.go b/pkg/api/v1alpha1/groupversion_info.go new file mode 100644 index 0000000..a5b6ea1 --- /dev/null +++ b/pkg/api/v1alpha1/groupversion_info.go @@ -0,0 +1,8 @@ +// Package v1alpha1 contains API Schema definitions for the flock.fritzlab.net +// v1alpha1 API group. +package v1alpha1 + +const ( + GroupName = "flock.fritzlab.net" + Version = "v1alpha1" +) diff --git a/pkg/api/v1alpha1/nodeconfig_types.go b/pkg/api/v1alpha1/nodeconfig_types.go new file mode 100644 index 0000000..258e450 --- /dev/null +++ b/pkg/api/v1alpha1/nodeconfig_types.go @@ -0,0 +1,54 @@ +package v1alpha1 + +// NodeConfigSpec is the operator-written desired state for a single node. +// +// The agent reads this on startup and via informer for live updates. There is +// no controller and no auto-allocation — purely declarative input. +type NodeConfigSpec struct { + // CIDR6 is the set of IPv6 CIDRs this node owns and advertises as BGP + // aggregates. Pod IPv6 addresses are allocated from these. + CIDR6 []string `json:"cidr6,omitempty"` + + // CIDR4 is the set of IPv4 CIDRs this node owns and advertises as BGP + // aggregates. Pod IPv4 addresses are allocated from these. + CIDR4 []string `json:"cidr4,omitempty"` + + // BGP configures the BGP sessions this node establishes upstream. + BGP BGPSpec `json:"bgp"` +} + +type BGPSpec struct { + // ASN is this node's local autonomous system number. + ASN uint32 `json:"asn"` + + // Peers lists upstream BGP peers (typically the rack/site router). + Peers []BGPPeer `json:"peers"` +} + +type BGPPeer struct { + // Address is the peer's IP (IPv6 or IPv4). + Address string `json:"address"` + // ASN is the peer's autonomous system number. + ASN uint32 `json:"asn"` +} + +// NodeConfig is the Schema for the nodeconfigs API. +type NodeConfig struct { + TypeMeta `json:",inline"` + ObjectMeta `json:"metadata,omitempty"` + + Spec NodeConfigSpec `json:"spec,omitempty"` +} + +// TypeMeta and ObjectMeta are minimal stand-ins so this package can be used +// without dragging in k8s.io/apimachinery during the M1 scaffold. They will be +// replaced by metav1.TypeMeta / metav1.ObjectMeta when the agent wires up +// controller-runtime in M2. +type TypeMeta struct { + Kind string `json:"kind,omitempty"` + APIVersion string `json:"apiVersion,omitempty"` +} + +type ObjectMeta struct { + Name string `json:"name,omitempty"` +} diff --git a/pkg/cni/plugin.go b/pkg/cni/plugin.go new file mode 100644 index 0000000..68c73bb --- /dev/null +++ b/pkg/cni/plugin.go @@ -0,0 +1,37 @@ +// Package cni hosts the CNI plugin entry-points. The plugin binary is short- +// lived: it is invoked by kubelet, talks to flock-agent over a unix socket, +// and exits. All real work happens in the agent. +package cni + +import ( + "errors" + + "github.com/containernetworking/cni/pkg/skel" + "github.com/containernetworking/cni/pkg/types" + current "github.com/containernetworking/cni/pkg/types/100" +) + +// SocketPath is the unix socket exposed by flock-agent. +const SocketPath = "/run/flock/flock.sock" + +var errNotImplemented = errors.New("flock: ADD/DEL/CHECK not implemented in M1 scaffold") + +// CmdAdd is invoked by kubelet when a pod sandbox is created. +func CmdAdd(args *skel.CmdArgs) error { + // M2: dial SocketPath, send ADD RPC, return CNI result. + _ = args + _ = current.ImplementedSpecVersion + return types.NewError(types.ErrInternal, "flock-add", errNotImplemented.Error()) +} + +// CmdDel is invoked by kubelet when a pod sandbox is torn down. +func CmdDel(args *skel.CmdArgs) error { + _ = args + return types.NewError(types.ErrInternal, "flock-del", errNotImplemented.Error()) +} + +// CmdCheck verifies that the live netns matches the persisted allocation. +func CmdCheck(args *skel.CmdArgs) error { + _ = args + return types.NewError(types.ErrInternal, "flock-check", errNotImplemented.Error()) +} diff --git a/pkg/cni/rpc_client.go b/pkg/cni/rpc_client.go new file mode 100644 index 0000000..0857ab9 --- /dev/null +++ b/pkg/cni/rpc_client.go @@ -0,0 +1,5 @@ +package cni + +// rpc_client.go will hold the JSON-over-unix-socket client used by the CNI +// plugin to call into flock-agent. Stub for M1; implementation lands in M2 +// alongside the agent's RPC server. diff --git a/pkg/embed/suffix.go b/pkg/embed/suffix.go new file mode 100644 index 0000000..7d34cb0 --- /dev/null +++ b/pkg/embed/suffix.go @@ -0,0 +1,174 @@ +// Package embed implements ip-algo: deterministic embedding of pod identity +// (namespace, pod name, image digest) into the host portion of an IPv6 +// address. The mapping is operator-friendly cosmetics — NOT a security +// boundary. See dfritz-cni.md "IPv6 IID Embedding" for the full spec. +package embed + +import ( + "encoding/hex" + "fmt" + "hash/fnv" + "net" + "strings" +) + +// Field is one of the supported identity fields. +type Field string + +const ( + FieldNamespace Field = "namespace" + FieldPod Field = "pod" + FieldImage Field = "image" +) + +// Values carries the inputs for one embedding call. Image holds the SHA-256 +// manifest digest as 64 hex chars when known; otherwise pass the containerID +// in ImageFallback and we'll FNV-1a-64 it. +type Values struct { + Namespace string + Pod string + Image string // 64-char hex sha256 manifest digest, or empty + ImageFallback string // typically containerID, used when Image=="". +} + +// MaxFieldNibbles is the largest single-field width supported by this +// implementation. 16 nibbles = 64 bits = the output width of FNV-1a-64. +// Wider fields would require a wider hash; the design doc tolerates this +// because real deployments use /64 nodes (15 field nibbles total). +const MaxFieldNibbles = 16 + +// Embed returns the IPv6 address inside `network` whose host portion encodes +// `fields` (in the given order) followed by the random nibble nNibble. +// +// `network` must be an IPv6 prefix whose length is a multiple of 4 (so the +// host portion is a whole number of nibbles). +// +// `fields` must be non-empty. For a fully-random IID, the caller should pick +// random bytes directly rather than calling Embed. +// +// nNibble is the random "instance" nibble; only the low 4 bits are used. +// Callers regenerate it on collision (see allocations.json). +func Embed(network *net.IPNet, fields []Field, vals Values, nNibble byte) (net.IP, error) { + ones, bits := network.Mask.Size() + if bits != 128 { + return nil, fmt.Errorf("network is not IPv6: %s", network) + } + if ones%4 != 0 { + return nil, fmt.Errorf("prefix length %d is not a multiple of 4", ones) + } + hostNibbles := (128 - ones) / 4 + if hostNibbles < 2 { + return nil, fmt.Errorf("prefix /%d leaves %d host nibble(s); need at least 2 (one field + N)", ones, hostNibbles) + } + if len(fields) == 0 { + return nil, fmt.Errorf("no fields specified; caller should generate random IID directly") + } + + fieldNibbles := hostNibbles - 1 + dist, err := distribute(fieldNibbles, len(fields)) + if err != nil { + return nil, err + } + + addr := make(net.IP, net.IPv6len) + copy(addr, network.IP.To16()) + + // Stream nibbles left-to-right starting at the first host nibble. + startNibble := ones / 4 + pos := 0 + for i, f := range fields { + n := dist[i] + v, err := fieldValue(f, vals, n*4) + if err != nil { + return nil, err + } + // Write `n` nibbles, most-significant first. + for j := n - 1; j >= 0; j-- { + nb := byte((v >> uint(j*4)) & 0xF) + writeNibble(addr, startNibble+pos, nb) + pos++ + } + } + writeNibble(addr, startNibble+pos, nNibble&0x0F) + + return addr, nil +} + +// distribute splits `total` nibbles across `k` fields as evenly as possible, +// giving any remainder to earlier fields one extra nibble at a time. +func distribute(total, k int) ([]int, error) { + if k <= 0 { + return nil, fmt.Errorf("k must be > 0") + } + if total < k { + return nil, fmt.Errorf("not enough host nibbles (%d) for %d fields", total, k) + } + out := make([]int, k) + base := total / k + rem := total % k + for i := range out { + out[i] = base + if i < rem { + out[i]++ + } + if out[i] > MaxFieldNibbles { + return nil, fmt.Errorf("field %d would need %d nibbles; max supported is %d", i, out[i], MaxFieldNibbles) + } + } + return out, nil +} + +// fieldValue returns the top `bits` bits of the hash-or-digest for `f`, +// right-aligned in the returned uint64. +func fieldValue(f Field, v Values, bits int) (uint64, error) { + if bits <= 0 || bits > 64 { + return 0, fmt.Errorf("bad field bits %d (1..64)", bits) + } + switch f { + case FieldNamespace: + return topBitsFNV(v.Namespace, bits), nil + case FieldPod: + return topBitsFNV(v.Pod, bits), nil + case FieldImage: + if v.Image != "" { + return topBitsHex(v.Image, bits) + } + return topBitsFNV(v.ImageFallback, bits), nil + default: + return 0, fmt.Errorf("unknown field %q", f) + } +} + +func topBitsFNV(s string, bits int) uint64 { + h := fnv.New64a() + _, _ = h.Write([]byte(s)) + return h.Sum64() >> uint(64-bits) +} + +// topBitsHex parses a leading sha256-digest-style hex string and returns +// its top `bits` bits, right-aligned. Accepts an optional "sha256:" prefix. +func topBitsHex(s string, bits int) (uint64, error) { + s = strings.TrimPrefix(s, "sha256:") + if len(s) < 16 { // need at least 8 bytes / 64 bits to right-shift + return 0, fmt.Errorf("image digest too short: %d hex chars", len(s)) + } + b, err := hex.DecodeString(s[:16]) + if err != nil { + return 0, fmt.Errorf("image digest not hex: %w", err) + } + var v uint64 + for _, x := range b { + v = (v << 8) | uint64(x) + } + return v >> uint(64-bits), nil +} + +// writeNibble sets the (nibIdx)-th nibble of addr (0 = highest nibble of byte 0). +func writeNibble(addr net.IP, nibIdx int, nb byte) { + bytePos := nibIdx / 2 + if nibIdx%2 == 0 { + addr[bytePos] = (addr[bytePos] & 0x0F) | (nb << 4) + } else { + addr[bytePos] = (addr[bytePos] & 0xF0) | (nb & 0x0F) + } +} diff --git a/pkg/embed/suffix_test.go b/pkg/embed/suffix_test.go new file mode 100644 index 0000000..6106f36 --- /dev/null +++ b/pkg/embed/suffix_test.go @@ -0,0 +1,153 @@ +package embed + +import ( + "net" + "testing" +) + +func mustCIDR(t *testing.T, s string) *net.IPNet { + t.Helper() + _, n, err := net.ParseCIDR(s) + if err != nil { + t.Fatalf("ParseCIDR(%q): %v", s, err) + } + return n +} + +func TestDistribute(t *testing.T) { + cases := []struct { + total, k int + want []int + }{ + // from the doc table + {19, 1, []int{19}}, // /48 1 field — would exceed MaxFieldNibbles, see error test below + {19, 2, []int{10, 9}}, + {19, 3, []int{7, 6, 6}}, + {17, 1, []int{17}}, + {17, 2, []int{9, 8}}, + {17, 3, []int{6, 6, 5}}, + {15, 1, []int{15}}, + {15, 2, []int{8, 7}}, + {15, 3, []int{5, 5, 5}}, + {11, 1, []int{11}}, + {11, 2, []int{6, 5}}, + {11, 3, []int{4, 4, 3}}, + {7, 1, []int{7}}, + {7, 2, []int{4, 3}}, + {7, 3, []int{3, 2, 2}}, + } + for _, c := range cases { + got, err := distribute(c.total, c.k) + if c.total > MaxFieldNibbles && c.k == 1 { + if err == nil { + t.Errorf("distribute(%d,%d): expected MaxFieldNibbles error", c.total, c.k) + } + continue + } + if err != nil { + t.Errorf("distribute(%d,%d): %v", c.total, c.k, err) + continue + } + if !equal(got, c.want) { + t.Errorf("distribute(%d,%d) = %v, want %v", c.total, c.k, got, c.want) + } + } +} + +func equal(a, b []int) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func TestEmbed_Slash64Deterministic(t *testing.T) { + // /64 with 3 fields: 5+5+5+1 nibbles = 64-bit IID. + net64 := mustCIDR(t, "2602:817:3000:f001::/64") + addr, err := Embed(net64, + []Field{FieldNamespace, FieldPod, FieldImage}, + Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"}, + 0xe, + ) + if err != nil { + t.Fatalf("Embed: %v", err) + } + // Property: same inputs → same output (twice). + addr2, err := Embed(net64, + []Field{FieldNamespace, FieldPod, FieldImage}, + Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"}, + 0xe, + ) + if err != nil { + t.Fatal(err) + } + if !addr.Equal(addr2) { + t.Fatalf("non-deterministic: %s vs %s", addr, addr2) + } + // Property: prefix preserved. + if !net64.Contains(addr) { + t.Fatalf("addr %s outside network %s", addr, net64) + } + // Property: last nibble is exactly N. + if got := addr[len(addr)-1] & 0x0F; got != 0xe { + t.Fatalf("last nibble = %x, want e", got) + } +} + +func TestEmbed_DifferentInputsDifferentOutputs(t *testing.T) { + net64 := mustCIDR(t, "2602:817:3000:f001::/64") + a, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns1", Pod: "p1"}, 0) + b, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns2", Pod: "p1"}, 0) + if a.Equal(b) { + t.Fatalf("different namespace produced identical IID: %s", a) + } +} + +func TestEmbed_NRandomizesLowNibble(t *testing.T) { + net64 := mustCIDR(t, "2602:817:3000:f001::/64") + a, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x1) + b, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x2) + if a.Equal(b) { + t.Fatalf("changing N did not change address") + } + // And the only difference should be the last nibble. + if a[15]>>4 != b[15]>>4 { + t.Fatalf("upper nibble of last byte changed unexpectedly: %x vs %x", a[15], b[15]) + } +} + +func TestEmbed_RejectsBadInputs(t *testing.T) { + net64 := mustCIDR(t, "2602:817:3000:f001::/64") + if _, err := Embed(net64, nil, Values{}, 0); err == nil { + t.Fatalf("expected error for empty fields") + } + odd := &net.IPNet{IP: net.ParseIP("2602:817:3000::"), Mask: net.CIDRMask(63, 128)} + if _, err := Embed(odd, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil { + t.Fatalf("expected error for /63 (not nibble-aligned)") + } + v4 := &net.IPNet{IP: net.ParseIP("10.0.0.0").To4(), Mask: net.CIDRMask(8, 32)} + if _, err := Embed(v4, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil { + t.Fatalf("expected error for IPv4 network") + } +} + +func TestEmbed_ImageDigestVsFallback(t *testing.T) { + net64 := mustCIDR(t, "2602:817:3000:f001::/64") + digest := "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011" + a, err := Embed(net64, []Field{FieldImage}, Values{Image: digest}, 0) + if err != nil { + t.Fatalf("Embed digest: %v", err) + } + b, err := Embed(net64, []Field{FieldImage}, Values{ImageFallback: "ctr-xyz"}, 0) + if err != nil { + t.Fatalf("Embed fallback: %v", err) + } + if a.Equal(b) { + t.Fatalf("digest and fallback produced same IID") + } +}