flock M1 scaffold: CNI plugin + agent + NodeConfig CRD
Build flock Image / build (push) Has been cancelled
Build flock Image / build (push) Has been cancelled
- cmd/flock + cmd/flock-agent: build cleanly; CNI ADD/DEL/CHECK return ErrInternal stubs until M2; agent boots, opens unix socket, logs JSON. - pkg/agent/state.go: durable allocations.json (atomic write + fsync + parent fsync); pending/committed lifecycle. Tests cover round-trip, replace-by-cid, version mismatch, no-leak-on-tmp. - pkg/embed/suffix.go: ip-algo IID embedding. Tests cover the /48-/96 nibble distribution table from the design doc, determinism, prefix preservation, N-nibble isolation, digest-vs-fallback divergence. - pkg/api/v1alpha1: minimal NodeConfig types (no controller-runtime yet). - deploy/: NodeConfig CRD, empty ServiceAccount/ClusterRole, DaemonSet pinned to flock.fritzlab.net/agent="" label so it only runs on opted-in nodes. - .gitea/workflows/main.yaml + Dockerfile: build + push to code.fritzlab.net/fritzlab/flock; runs go test in CI. Design doc: dfritzlab/k8s-manager/dfritz-cni.md. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
.git
|
||||
.gitea
|
||||
deploy
|
||||
hack
|
||||
test
|
||||
README.md
|
||||
LICENSE
|
||||
*.md
|
||||
@@ -0,0 +1,60 @@
|
||||
name: Build flock Image
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: fritzlab
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
docker run --rm -v "$PWD:/src" -w /src golang:1.26-alpine \
|
||||
sh -c "go test ./..."
|
||||
|
||||
- name: Log in to Gitea registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: code.fritzlab.net
|
||||
username: ci-bot
|
||||
password: ${{ secrets.REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Extract Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: code.fritzlab.net/fritzlab/flock
|
||||
tags: |
|
||||
type=raw,value=latest
|
||||
type=raw,value=${{ github.run_number }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
provenance: false
|
||||
build-args: |
|
||||
GIT_SHA=${{ github.sha }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
network: host
|
||||
|
||||
- name: Smoke-test image
|
||||
run: |
|
||||
docker run --rm code.fritzlab.net/fritzlab/flock:${{ github.run_number }} --help || true
|
||||
docker run --rm --entrypoint /usr/local/bin/flock \
|
||||
code.fritzlab.net/fritzlab/flock:${{ github.run_number }} || true
|
||||
|
||||
- name: Clean up old image tags
|
||||
run: |
|
||||
tea login add --name ci --url https://code.fritzlab.net --token '${{ secrets.CI_BOT_TOKEN }}' --no-version-check
|
||||
tea api '/packages/fritzlab?type=container' \
|
||||
| jq -r '.[] | select(.name=="flock") | select(.version | test("^[0-9]+$")) | .version' \
|
||||
| sort -n | head -n -3 \
|
||||
| while read tag; do
|
||||
echo "deleting flock:$tag"
|
||||
tea api -X DELETE "/packages/fritzlab/container/flock/$tag"
|
||||
done
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
FROM golang:1.26-alpine AS build
|
||||
|
||||
WORKDIR /src
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY cmd/ ./cmd/
|
||||
COPY pkg/ ./pkg/
|
||||
|
||||
ARG GIT_SHA=unknown
|
||||
RUN CGO_ENABLED=0 go build -trimpath \
|
||||
-ldflags="-s -w -X main.gitSHA=${GIT_SHA}" \
|
||||
-o /out/flock ./cmd/flock \
|
||||
&& CGO_ENABLED=0 go build -trimpath \
|
||||
-ldflags="-s -w -X main.gitSHA=${GIT_SHA}" \
|
||||
-o /out/flock-agent ./cmd/flock-agent
|
||||
|
||||
FROM alpine:3.21
|
||||
RUN apk add --no-cache iproute2 ca-certificates
|
||||
COPY --from=build /out/flock /usr/local/bin/flock
|
||||
COPY --from=build /out/flock-agent /usr/local/bin/flock-agent
|
||||
ENTRYPOINT ["/usr/local/bin/flock-agent"]
|
||||
@@ -0,0 +1,15 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
@@ -0,0 +1,22 @@
|
||||
# flock
|
||||
|
||||
Kubernetes CNI for sjc001. Per-pod IPv4 opt-in, IID embedding, Ready-gated anycast via BGP.
|
||||
|
||||
Design doc: `k8s-manager/dfritz-cni.md` (in the operator's k8s-manager repo).
|
||||
|
||||
Status: M1 scaffold. Not functional. See milestones table in the design doc.
|
||||
|
||||
## Layout
|
||||
|
||||
- `cmd/flock` — CNI plugin binary (kubelet-invoked)
|
||||
- `cmd/flock-agent` — DaemonSet binary
|
||||
- `pkg/api/v1alpha1` — `NodeConfig` CRD types
|
||||
- `pkg/cni` — CNI plugin internals + RPC client
|
||||
- `pkg/agent` — agent server, IPAM, state file, anycast, NetworkPolicy
|
||||
- `pkg/embed` — `ip-algo` IID embedding (pure)
|
||||
- `pkg/routing/{bird,ospf}` — routing backends
|
||||
- `deploy/` — CRDs, RBAC, DaemonSet manifests
|
||||
|
||||
## License
|
||||
|
||||
Apache 2.0.
|
||||
@@ -0,0 +1,50 @@
|
||||
// Command flock-agent is the per-node DaemonSet binary. It owns IPAM, netns
|
||||
// programming, BIRD config, and nftables. M1 boots only the state store and a
|
||||
// placeholder unix listener.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"code.fritzlab.net/fritzlab/flock/pkg/agent"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var (
|
||||
node = flag.String("node", os.Getenv("NODE_NAME"), "node name (defaults to $NODE_NAME)")
|
||||
statePath = flag.String("state", "/var/lib/flock/allocations.json", "path to allocations.json")
|
||||
socket = flag.String("socket", agent.SocketPath, "unix socket for CNI RPC")
|
||||
)
|
||||
flag.Parse()
|
||||
|
||||
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||
|
||||
if *node == "" {
|
||||
logger.Error("--node or $NODE_NAME is required")
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
srv, err := agent.NewServer(agent.Config{
|
||||
Node: *node,
|
||||
StatePath: *statePath,
|
||||
Socket: *socket,
|
||||
Logger: logger,
|
||||
})
|
||||
if err != nil {
|
||||
logger.Error("init server", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
if err := srv.Run(ctx); err != nil {
|
||||
logger.Error("run", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
// Command flock is the kubelet-invoked CNI plugin binary. It is short-lived:
|
||||
// kubelet execs it for ADD/DEL/CHECK, the binary forwards to flock-agent over
|
||||
// a unix socket, and exits.
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/containernetworking/cni/pkg/skel"
|
||||
"github.com/containernetworking/cni/pkg/version"
|
||||
|
||||
"code.fritzlab.net/fritzlab/flock/pkg/cni"
|
||||
)
|
||||
|
||||
func main() {
|
||||
skel.PluginMainFuncs(skel.CNIFuncs{
|
||||
Add: cni.CmdAdd,
|
||||
Del: cni.CmdDel,
|
||||
Check: cni.CmdCheck,
|
||||
}, version.All, "flock")
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: nodeconfigs.flock.fritzlab.net
|
||||
spec:
|
||||
group: flock.fritzlab.net
|
||||
scope: Cluster
|
||||
names:
|
||||
kind: NodeConfig
|
||||
listKind: NodeConfigList
|
||||
singular: nodeconfig
|
||||
plural: nodeconfigs
|
||||
shortNames:
|
||||
- fnc
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
required: [spec]
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
required: [bgp]
|
||||
properties:
|
||||
cidr6:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: IPv6 CIDR owned and aggregate-advertised by this node.
|
||||
cidr4:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: IPv4 CIDR owned and aggregate-advertised by this node.
|
||||
bgp:
|
||||
type: object
|
||||
required: [asn, peers]
|
||||
properties:
|
||||
asn:
|
||||
type: integer
|
||||
format: int64
|
||||
minimum: 1
|
||||
maximum: 4294967295
|
||||
description: This node's local ASN.
|
||||
peers:
|
||||
type: array
|
||||
minItems: 1
|
||||
items:
|
||||
type: object
|
||||
required: [address, asn]
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
description: Peer IP (IPv6 or IPv4).
|
||||
asn:
|
||||
type: integer
|
||||
format: int64
|
||||
minimum: 1
|
||||
maximum: 4294967295
|
||||
additionalPrinterColumns:
|
||||
- name: ASN
|
||||
type: integer
|
||||
jsonPath: .spec.bgp.asn
|
||||
- name: CIDR6
|
||||
type: string
|
||||
jsonPath: .spec.cidr6
|
||||
- name: CIDR4
|
||||
type: string
|
||||
jsonPath: .spec.cidr4
|
||||
@@ -0,0 +1,84 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: flock-agent
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: flock-agent
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: flock-agent
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxUnavailable: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: flock-agent
|
||||
spec:
|
||||
serviceAccountName: flock-agent
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
priorityClassName: system-node-critical
|
||||
# M1: opt-in per-node via this label. Remove the nodeSelector when
|
||||
# ready to roll flock to all nodes.
|
||||
nodeSelector:
|
||||
flock.fritzlab.net/agent: ""
|
||||
# Tolerate the cni-test taint applied during migration.
|
||||
tolerations:
|
||||
- key: fritzlab.net/cni-test
|
||||
operator: Equal
|
||||
value: "true"
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node.kubernetes.io/not-ready
|
||||
operator: Exists
|
||||
effect: NoExecute
|
||||
- key: node.kubernetes.io/unreachable
|
||||
operator: Exists
|
||||
effect: NoExecute
|
||||
containers:
|
||||
- name: flock-agent
|
||||
image: code.fritzlab.net/fritzlab/flock:latest
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- --node=$(NODE_NAME)
|
||||
- --state=/var/lib/flock/allocations.json
|
||||
- --socket=/run/flock/flock.sock
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
# M1: no privileged caps. M2 adds NET_ADMIN/NET_RAW for netlink.
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
volumeMounts:
|
||||
- name: lib-flock
|
||||
mountPath: /var/lib/flock
|
||||
- name: run-flock
|
||||
mountPath: /run/flock
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
memory: 128Mi
|
||||
volumes:
|
||||
- name: lib-flock
|
||||
hostPath:
|
||||
path: /var/lib/flock
|
||||
type: DirectoryOrCreate
|
||||
- name: run-flock
|
||||
hostPath:
|
||||
path: /run/flock
|
||||
type: DirectoryOrCreate
|
||||
imagePullSecrets:
|
||||
- name: code-fritzlab-net
|
||||
@@ -0,0 +1,184 @@
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: nodeconfigs.flock.fritzlab.net
|
||||
spec:
|
||||
group: flock.fritzlab.net
|
||||
scope: Cluster
|
||||
names:
|
||||
kind: NodeConfig
|
||||
listKind: NodeConfigList
|
||||
singular: nodeconfig
|
||||
plural: nodeconfigs
|
||||
shortNames:
|
||||
- fnc
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
required: [spec]
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
required: [bgp]
|
||||
properties:
|
||||
cidr6:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: IPv6 CIDR owned and aggregate-advertised by this node.
|
||||
cidr4:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: IPv4 CIDR owned and aggregate-advertised by this node.
|
||||
bgp:
|
||||
type: object
|
||||
required: [asn, peers]
|
||||
properties:
|
||||
asn:
|
||||
type: integer
|
||||
format: int64
|
||||
minimum: 1
|
||||
maximum: 4294967295
|
||||
description: This node's local ASN.
|
||||
peers:
|
||||
type: array
|
||||
minItems: 1
|
||||
items:
|
||||
type: object
|
||||
required: [address, asn]
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
description: Peer IP (IPv6 or IPv4).
|
||||
asn:
|
||||
type: integer
|
||||
format: int64
|
||||
minimum: 1
|
||||
maximum: 4294967295
|
||||
additionalPrinterColumns:
|
||||
- name: ASN
|
||||
type: integer
|
||||
jsonPath: .spec.bgp.asn
|
||||
- name: CIDR6
|
||||
type: string
|
||||
jsonPath: .spec.cidr6
|
||||
- name: CIDR4
|
||||
type: string
|
||||
jsonPath: .spec.cidr4
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: flock-agent
|
||||
namespace: kube-system
|
||||
---
|
||||
# M1 RBAC: empty. The agent does not yet read any Kubernetes objects.
|
||||
# M2+ will add Pod, NetworkPolicy, and NodeConfig permissions here.
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: flock-agent
|
||||
rules: []
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: flock-agent
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: flock-agent
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: flock-agent
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: flock-agent
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: flock-agent
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: flock-agent
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxUnavailable: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: flock-agent
|
||||
spec:
|
||||
serviceAccountName: flock-agent
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
priorityClassName: system-node-critical
|
||||
# M1: opt-in per-node via this label. Remove the nodeSelector when
|
||||
# ready to roll flock to all nodes.
|
||||
nodeSelector:
|
||||
flock.fritzlab.net/agent: ""
|
||||
# Tolerate the cni-test taint applied during migration.
|
||||
tolerations:
|
||||
- key: fritzlab.net/cni-test
|
||||
operator: Equal
|
||||
value: "true"
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node.kubernetes.io/not-ready
|
||||
operator: Exists
|
||||
effect: NoExecute
|
||||
- key: node.kubernetes.io/unreachable
|
||||
operator: Exists
|
||||
effect: NoExecute
|
||||
containers:
|
||||
- name: flock-agent
|
||||
image: code.fritzlab.net/fritzlab/flock:latest
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- --node=$(NODE_NAME)
|
||||
- --state=/var/lib/flock/allocations.json
|
||||
- --socket=/run/flock/flock.sock
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
# M1: no privileged caps. M2 adds NET_ADMIN/NET_RAW for netlink.
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
volumeMounts:
|
||||
- name: lib-flock
|
||||
mountPath: /var/lib/flock
|
||||
- name: run-flock
|
||||
mountPath: /run/flock
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
memory: 128Mi
|
||||
volumes:
|
||||
- name: lib-flock
|
||||
hostPath:
|
||||
path: /var/lib/flock
|
||||
type: DirectoryOrCreate
|
||||
- name: run-flock
|
||||
hostPath:
|
||||
path: /run/flock
|
||||
type: DirectoryOrCreate
|
||||
imagePullSecrets:
|
||||
- name: code-fritzlab-net
|
||||
@@ -0,0 +1,26 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: flock-agent
|
||||
namespace: kube-system
|
||||
---
|
||||
# M1 RBAC: empty. The agent does not yet read any Kubernetes objects.
|
||||
# M2+ will add Pod, NetworkPolicy, and NodeConfig permissions here.
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: flock-agent
|
||||
rules: []
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: flock-agent
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: flock-agent
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: flock-agent
|
||||
namespace: kube-system
|
||||
@@ -0,0 +1,10 @@
|
||||
module code.fritzlab.net/fritzlab/flock
|
||||
|
||||
go 1.26.1
|
||||
|
||||
require github.com/containernetworking/cni v1.3.0
|
||||
|
||||
require (
|
||||
github.com/vishvananda/netns v0.0.4 // indirect
|
||||
golang.org/x/sys v0.23.0 // indirect
|
||||
)
|
||||
@@ -0,0 +1,28 @@
|
||||
github.com/containernetworking/cni v1.3.0 h1:v6EpN8RznAZj9765HhXQrtXgX+ECGebEYEmnuFjskwo=
|
||||
github.com/containernetworking/cni v1.3.0/go.mod h1:Bs8glZjjFfGPHMw6hQu82RUgEPNGEaBb9KS5KtNMnJ4=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
|
||||
github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
|
||||
github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo=
|
||||
github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
|
||||
github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
|
||||
github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
|
||||
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
|
||||
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
|
||||
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
|
||||
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
|
||||
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
|
||||
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
|
||||
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
|
||||
golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
|
||||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
|
||||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
@@ -0,0 +1,100 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// SocketPath is the unix socket on which flock-agent serves RPCs from the
|
||||
// CNI plugin. Mirrors pkg/cni.SocketPath; kept as a separate constant so the
|
||||
// agent package has no import-cycle on the CNI package.
|
||||
const SocketPath = "/run/flock/flock.sock"
|
||||
|
||||
// Server is the agent's runtime container: state store, kubernetes informers,
|
||||
// netlink, BIRD, nftables. M1 wires only the state store and a placeholder
|
||||
// listener so the binary boots and exits cleanly under a context.
|
||||
type Server struct {
|
||||
Node string
|
||||
Store *Store
|
||||
Logger *slog.Logger
|
||||
socket string
|
||||
closeCh chan struct{}
|
||||
}
|
||||
|
||||
// Config configures NewServer.
|
||||
type Config struct {
|
||||
Node string
|
||||
StatePath string // typically /var/lib/flock/allocations.json
|
||||
Socket string // typically /run/flock/flock.sock
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewServer constructs a Server. It does NOT start any goroutines; call Run.
|
||||
func NewServer(cfg Config) (*Server, error) {
|
||||
if cfg.Node == "" {
|
||||
return nil, fmt.Errorf("Node must be set")
|
||||
}
|
||||
if cfg.StatePath == "" {
|
||||
cfg.StatePath = "/var/lib/flock/allocations.json"
|
||||
}
|
||||
if cfg.Socket == "" {
|
||||
cfg.Socket = SocketPath
|
||||
}
|
||||
if cfg.Logger == nil {
|
||||
cfg.Logger = slog.Default()
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(cfg.StatePath), 0o750); err != nil {
|
||||
return nil, fmt.Errorf("mkdir state dir: %w", err)
|
||||
}
|
||||
store, err := NewStore(cfg.StatePath, cfg.Node)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open store: %w", err)
|
||||
}
|
||||
return &Server{
|
||||
Node: cfg.Node,
|
||||
Store: store,
|
||||
Logger: cfg.Logger,
|
||||
socket: cfg.Socket,
|
||||
closeCh: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run starts the agent and blocks until ctx is cancelled. M1 only opens the
|
||||
// unix listener (proving permissions/path); the RPC handler is a no-op
|
||||
// returning ENOSYS until M2.
|
||||
func (s *Server) Run(ctx context.Context) error {
|
||||
if err := os.MkdirAll(filepath.Dir(s.socket), 0o750); err != nil {
|
||||
return fmt.Errorf("mkdir socket dir: %w", err)
|
||||
}
|
||||
_ = os.Remove(s.socket)
|
||||
l, err := net.Listen("unix", s.socket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("listen %s: %w", s.socket, err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
s.Logger.Info("flock-agent started",
|
||||
"node", s.Node,
|
||||
"socket", s.socket,
|
||||
"allocations", len(s.Store.Snapshot()),
|
||||
)
|
||||
|
||||
// Accept loop: M1 closes every accepted conn immediately. M2 will dispatch.
|
||||
go func() {
|
||||
for {
|
||||
conn, err := l.Accept()
|
||||
if err != nil {
|
||||
return // listener closed
|
||||
}
|
||||
_ = conn.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
<-ctx.Done()
|
||||
s.Logger.Info("flock-agent stopping")
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
// Package agent owns the in-process flock-agent runtime: IPAM, netns, state,
|
||||
// anycast, and NetworkPolicy. This file implements the durable per-node
|
||||
// allocation file at /var/lib/flock/allocations.json.
|
||||
package agent
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// AllocationState is the lifecycle marker for an entry in allocations.json.
|
||||
//
|
||||
// pending — IPAM picked addresses; netlink work may be incomplete.
|
||||
// On agent startup these are GC'd: addrs/routes/veth removed.
|
||||
// committed — netlink ops finished; entry is the source of truth.
|
||||
type AllocationState string
|
||||
|
||||
const (
|
||||
StatePending AllocationState = "pending"
|
||||
StateCommitted AllocationState = "committed"
|
||||
)
|
||||
|
||||
// Allocation is a single per-pod entry persisted in allocations.json.
|
||||
type Allocation struct {
|
||||
ContainerID string `json:"container_id"`
|
||||
Namespace string `json:"namespace"`
|
||||
PodName string `json:"pod_name"`
|
||||
OwnerUID string `json:"owner_uid"`
|
||||
IP6 string `json:"ip6,omitempty"`
|
||||
IP4 string `json:"ip4,omitempty"`
|
||||
Anycast []string `json:"anycast,omitempty"`
|
||||
State AllocationState `json:"state"`
|
||||
AllocatedAt time.Time `json:"allocated_at"`
|
||||
}
|
||||
|
||||
// State is the on-disk file shape. Version is bumped on incompatible changes.
|
||||
type State struct {
|
||||
Version int `json:"version"`
|
||||
Node string `json:"node"`
|
||||
Allocations []Allocation `json:"allocations"`
|
||||
}
|
||||
|
||||
const stateVersion = 1
|
||||
|
||||
// Store is the durable allocation store.
|
||||
//
|
||||
// All public methods are safe for concurrent use. They serialize through
|
||||
// a single mutex so that the on-disk file is always consistent and so that
|
||||
// the CNI ADD/DEL critical sections (which mutate kernel state alongside the
|
||||
// file) can rely on snapshot semantics.
|
||||
type Store struct {
|
||||
mu sync.Mutex
|
||||
path string
|
||||
node string
|
||||
data State
|
||||
}
|
||||
|
||||
// NewStore opens (or creates) a per-node store. The directory containing
|
||||
// `path` must already exist; we do not create it because in production it is
|
||||
// /var/lib/flock managed by the DaemonSet, not the agent process.
|
||||
func NewStore(path, node string) (*Store, error) {
|
||||
s := &Store{path: path, node: node}
|
||||
if err := s.load(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *Store) load() error {
|
||||
b, err := os.ReadFile(s.path)
|
||||
if os.IsNotExist(err) {
|
||||
s.data = State{Version: stateVersion, Node: s.node, Allocations: []Allocation{}}
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("read state: %w", err)
|
||||
}
|
||||
if err := json.Unmarshal(b, &s.data); err != nil {
|
||||
return fmt.Errorf("parse state: %w", err)
|
||||
}
|
||||
if s.data.Version != stateVersion {
|
||||
return fmt.Errorf("state version %d, want %d", s.data.Version, stateVersion)
|
||||
}
|
||||
if s.data.Allocations == nil {
|
||||
s.data.Allocations = []Allocation{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// flushLocked writes the in-memory state to disk durably:
|
||||
//
|
||||
// 1. write to <path>.tmp
|
||||
// 2. fsync(tmpfd)
|
||||
// 3. rename to <path>
|
||||
// 4. fsync(parent dir) — required so the rename survives power loss.
|
||||
//
|
||||
// Caller MUST hold s.mu.
|
||||
func (s *Store) flushLocked() error {
|
||||
b, err := json.MarshalIndent(s.data, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal state: %w", err)
|
||||
}
|
||||
|
||||
tmp := s.path + ".tmp"
|
||||
f, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open tmp: %w", err)
|
||||
}
|
||||
if _, err := f.Write(b); err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("write tmp: %w", err)
|
||||
}
|
||||
if err := f.Sync(); err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("fsync tmp: %w", err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("close tmp: %w", err)
|
||||
}
|
||||
if err := os.Rename(tmp, s.path); err != nil {
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("rename: %w", err)
|
||||
}
|
||||
|
||||
dir, err := os.Open(filepath.Dir(s.path))
|
||||
if err != nil {
|
||||
return fmt.Errorf("open parent: %w", err)
|
||||
}
|
||||
defer dir.Close()
|
||||
if err := dir.Sync(); err != nil {
|
||||
return fmt.Errorf("fsync parent: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get returns the entry for containerID, ok=false if absent. Returned value
|
||||
// is a copy; mutations do not affect store state.
|
||||
func (s *Store) Get(containerID string) (Allocation, bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for _, a := range s.data.Allocations {
|
||||
if a.ContainerID == containerID {
|
||||
return a, true
|
||||
}
|
||||
}
|
||||
return Allocation{}, false
|
||||
}
|
||||
|
||||
// Upsert inserts or replaces the entry for a.ContainerID and flushes.
|
||||
func (s *Store) Upsert(a Allocation) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for i, ex := range s.data.Allocations {
|
||||
if ex.ContainerID == a.ContainerID {
|
||||
s.data.Allocations[i] = a
|
||||
return s.flushLocked()
|
||||
}
|
||||
}
|
||||
s.data.Allocations = append(s.data.Allocations, a)
|
||||
return s.flushLocked()
|
||||
}
|
||||
|
||||
// Delete removes the entry for containerID (no-op if absent) and flushes.
|
||||
func (s *Store) Delete(containerID string) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for i, a := range s.data.Allocations {
|
||||
if a.ContainerID == containerID {
|
||||
s.data.Allocations = append(s.data.Allocations[:i], s.data.Allocations[i+1:]...)
|
||||
return s.flushLocked()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Snapshot returns a defensive copy of all allocations.
|
||||
func (s *Store) Snapshot() []Allocation {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
out := make([]Allocation, len(s.data.Allocations))
|
||||
copy(out, s.data.Allocations)
|
||||
return out
|
||||
}
|
||||
|
||||
// PendingContainerIDs returns container IDs whose entries are State==pending.
|
||||
// Used by agent startup GC.
|
||||
func (s *Store) PendingContainerIDs() []string {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
var out []string
|
||||
for _, a := range s.data.Allocations {
|
||||
if a.State == StatePending {
|
||||
out = append(out, a.ContainerID)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func newStore(t *testing.T) (*Store, string) {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "allocations.json")
|
||||
s, err := NewStore(path, "host001")
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore: %v", err)
|
||||
}
|
||||
return s, path
|
||||
}
|
||||
|
||||
func TestStore_EmptyOnFirstOpen(t *testing.T) {
|
||||
s, _ := newStore(t)
|
||||
if got := len(s.Snapshot()); got != 0 {
|
||||
t.Fatalf("Snapshot len = %d, want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_UpsertGetDelete(t *testing.T) {
|
||||
s, path := newStore(t)
|
||||
a := Allocation{
|
||||
ContainerID: "abc",
|
||||
Namespace: "mail",
|
||||
PodName: "stalwart-0",
|
||||
OwnerUID: "uid-1",
|
||||
IP6: "2602:817:3000:f001::1",
|
||||
State: StateCommitted,
|
||||
AllocatedAt: time.Now().UTC().Truncate(time.Second),
|
||||
}
|
||||
if err := s.Upsert(a); err != nil {
|
||||
t.Fatalf("Upsert: %v", err)
|
||||
}
|
||||
|
||||
got, ok := s.Get("abc")
|
||||
if !ok || got.PodName != "stalwart-0" {
|
||||
t.Fatalf("Get after Upsert: ok=%v got=%+v", ok, got)
|
||||
}
|
||||
|
||||
// Round-trip: a fresh Store reading the same path sees the entry.
|
||||
s2, err := NewStore(path, "host001")
|
||||
if err != nil {
|
||||
t.Fatalf("reopen: %v", err)
|
||||
}
|
||||
if got, ok := s2.Get("abc"); !ok || got.IP6 != a.IP6 {
|
||||
t.Fatalf("reopen Get: ok=%v got=%+v", ok, got)
|
||||
}
|
||||
|
||||
if err := s.Delete("abc"); err != nil {
|
||||
t.Fatalf("Delete: %v", err)
|
||||
}
|
||||
if _, ok := s.Get("abc"); ok {
|
||||
t.Fatalf("entry still present after Delete")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_UpsertReplacesByContainerID(t *testing.T) {
|
||||
s, _ := newStore(t)
|
||||
must := func(err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::1", State: StatePending}))
|
||||
must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::2", State: StateCommitted}))
|
||||
if got := len(s.Snapshot()); got != 1 {
|
||||
t.Fatalf("len = %d, want 1 (Upsert should replace)", got)
|
||||
}
|
||||
if a, _ := s.Get("abc"); a.IP6 != "::2" || a.State != StateCommitted {
|
||||
t.Fatalf("Upsert did not replace: %+v", a)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_PendingContainerIDs(t *testing.T) {
|
||||
s, _ := newStore(t)
|
||||
_ = s.Upsert(Allocation{ContainerID: "p1", State: StatePending})
|
||||
_ = s.Upsert(Allocation{ContainerID: "c1", State: StateCommitted})
|
||||
_ = s.Upsert(Allocation{ContainerID: "p2", State: StatePending})
|
||||
|
||||
pend := s.PendingContainerIDs()
|
||||
if len(pend) != 2 {
|
||||
t.Fatalf("PendingContainerIDs len = %d, want 2", len(pend))
|
||||
}
|
||||
have := map[string]bool{pend[0]: true, pend[1]: true}
|
||||
if !have["p1"] || !have["p2"] {
|
||||
t.Fatalf("PendingContainerIDs = %v, want p1,p2", pend)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_RejectsWrongVersion(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "allocations.json")
|
||||
if err := os.WriteFile(path, []byte(`{"version":99,"node":"x","allocations":[]}`), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := NewStore(path, "x"); err == nil {
|
||||
t.Fatalf("expected error on bad version, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_AtomicWriteDurability(t *testing.T) {
|
||||
// We can't simulate a real power-loss in unit tests, but we can verify
|
||||
// that no .tmp file is left behind after a successful flush, and that
|
||||
// the rename target is intact.
|
||||
s, path := newStore(t)
|
||||
if err := s.Upsert(Allocation{ContainerID: "x", State: StateCommitted}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
|
||||
t.Fatalf(".tmp leaked: err=%v", err)
|
||||
}
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil || len(b) == 0 {
|
||||
t.Fatalf("final file unreadable: err=%v len=%d", err, len(b))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
// Package v1alpha1 contains API Schema definitions for the flock.fritzlab.net
|
||||
// v1alpha1 API group.
|
||||
package v1alpha1
|
||||
|
||||
const (
|
||||
GroupName = "flock.fritzlab.net"
|
||||
Version = "v1alpha1"
|
||||
)
|
||||
@@ -0,0 +1,54 @@
|
||||
package v1alpha1
|
||||
|
||||
// NodeConfigSpec is the operator-written desired state for a single node.
|
||||
//
|
||||
// The agent reads this on startup and via informer for live updates. There is
|
||||
// no controller and no auto-allocation — purely declarative input.
|
||||
type NodeConfigSpec struct {
|
||||
// CIDR6 is the set of IPv6 CIDRs this node owns and advertises as BGP
|
||||
// aggregates. Pod IPv6 addresses are allocated from these.
|
||||
CIDR6 []string `json:"cidr6,omitempty"`
|
||||
|
||||
// CIDR4 is the set of IPv4 CIDRs this node owns and advertises as BGP
|
||||
// aggregates. Pod IPv4 addresses are allocated from these.
|
||||
CIDR4 []string `json:"cidr4,omitempty"`
|
||||
|
||||
// BGP configures the BGP sessions this node establishes upstream.
|
||||
BGP BGPSpec `json:"bgp"`
|
||||
}
|
||||
|
||||
type BGPSpec struct {
|
||||
// ASN is this node's local autonomous system number.
|
||||
ASN uint32 `json:"asn"`
|
||||
|
||||
// Peers lists upstream BGP peers (typically the rack/site router).
|
||||
Peers []BGPPeer `json:"peers"`
|
||||
}
|
||||
|
||||
type BGPPeer struct {
|
||||
// Address is the peer's IP (IPv6 or IPv4).
|
||||
Address string `json:"address"`
|
||||
// ASN is the peer's autonomous system number.
|
||||
ASN uint32 `json:"asn"`
|
||||
}
|
||||
|
||||
// NodeConfig is the Schema for the nodeconfigs API.
|
||||
type NodeConfig struct {
|
||||
TypeMeta `json:",inline"`
|
||||
ObjectMeta `json:"metadata,omitempty"`
|
||||
|
||||
Spec NodeConfigSpec `json:"spec,omitempty"`
|
||||
}
|
||||
|
||||
// TypeMeta and ObjectMeta are minimal stand-ins so this package can be used
|
||||
// without dragging in k8s.io/apimachinery during the M1 scaffold. They will be
|
||||
// replaced by metav1.TypeMeta / metav1.ObjectMeta when the agent wires up
|
||||
// controller-runtime in M2.
|
||||
type TypeMeta struct {
|
||||
Kind string `json:"kind,omitempty"`
|
||||
APIVersion string `json:"apiVersion,omitempty"`
|
||||
}
|
||||
|
||||
type ObjectMeta struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Package cni hosts the CNI plugin entry-points. The plugin binary is short-
|
||||
// lived: it is invoked by kubelet, talks to flock-agent over a unix socket,
|
||||
// and exits. All real work happens in the agent.
|
||||
package cni
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/containernetworking/cni/pkg/skel"
|
||||
"github.com/containernetworking/cni/pkg/types"
|
||||
current "github.com/containernetworking/cni/pkg/types/100"
|
||||
)
|
||||
|
||||
// SocketPath is the unix socket exposed by flock-agent.
|
||||
const SocketPath = "/run/flock/flock.sock"
|
||||
|
||||
var errNotImplemented = errors.New("flock: ADD/DEL/CHECK not implemented in M1 scaffold")
|
||||
|
||||
// CmdAdd is invoked by kubelet when a pod sandbox is created.
|
||||
func CmdAdd(args *skel.CmdArgs) error {
|
||||
// M2: dial SocketPath, send ADD RPC, return CNI result.
|
||||
_ = args
|
||||
_ = current.ImplementedSpecVersion
|
||||
return types.NewError(types.ErrInternal, "flock-add", errNotImplemented.Error())
|
||||
}
|
||||
|
||||
// CmdDel is invoked by kubelet when a pod sandbox is torn down.
|
||||
func CmdDel(args *skel.CmdArgs) error {
|
||||
_ = args
|
||||
return types.NewError(types.ErrInternal, "flock-del", errNotImplemented.Error())
|
||||
}
|
||||
|
||||
// CmdCheck verifies that the live netns matches the persisted allocation.
|
||||
func CmdCheck(args *skel.CmdArgs) error {
|
||||
_ = args
|
||||
return types.NewError(types.ErrInternal, "flock-check", errNotImplemented.Error())
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
package cni
|
||||
|
||||
// rpc_client.go will hold the JSON-over-unix-socket client used by the CNI
|
||||
// plugin to call into flock-agent. Stub for M1; implementation lands in M2
|
||||
// alongside the agent's RPC server.
|
||||
@@ -0,0 +1,174 @@
|
||||
// Package embed implements ip-algo: deterministic embedding of pod identity
|
||||
// (namespace, pod name, image digest) into the host portion of an IPv6
|
||||
// address. The mapping is operator-friendly cosmetics — NOT a security
|
||||
// boundary. See dfritz-cni.md "IPv6 IID Embedding" for the full spec.
|
||||
package embed
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Field is one of the supported identity fields.
|
||||
type Field string
|
||||
|
||||
const (
|
||||
FieldNamespace Field = "namespace"
|
||||
FieldPod Field = "pod"
|
||||
FieldImage Field = "image"
|
||||
)
|
||||
|
||||
// Values carries the inputs for one embedding call. Image holds the SHA-256
|
||||
// manifest digest as 64 hex chars when known; otherwise pass the containerID
|
||||
// in ImageFallback and we'll FNV-1a-64 it.
|
||||
type Values struct {
|
||||
Namespace string
|
||||
Pod string
|
||||
Image string // 64-char hex sha256 manifest digest, or empty
|
||||
ImageFallback string // typically containerID, used when Image=="".
|
||||
}
|
||||
|
||||
// MaxFieldNibbles is the largest single-field width supported by this
|
||||
// implementation. 16 nibbles = 64 bits = the output width of FNV-1a-64.
|
||||
// Wider fields would require a wider hash; the design doc tolerates this
|
||||
// because real deployments use /64 nodes (15 field nibbles total).
|
||||
const MaxFieldNibbles = 16
|
||||
|
||||
// Embed returns the IPv6 address inside `network` whose host portion encodes
|
||||
// `fields` (in the given order) followed by the random nibble nNibble.
|
||||
//
|
||||
// `network` must be an IPv6 prefix whose length is a multiple of 4 (so the
|
||||
// host portion is a whole number of nibbles).
|
||||
//
|
||||
// `fields` must be non-empty. For a fully-random IID, the caller should pick
|
||||
// random bytes directly rather than calling Embed.
|
||||
//
|
||||
// nNibble is the random "instance" nibble; only the low 4 bits are used.
|
||||
// Callers regenerate it on collision (see allocations.json).
|
||||
func Embed(network *net.IPNet, fields []Field, vals Values, nNibble byte) (net.IP, error) {
|
||||
ones, bits := network.Mask.Size()
|
||||
if bits != 128 {
|
||||
return nil, fmt.Errorf("network is not IPv6: %s", network)
|
||||
}
|
||||
if ones%4 != 0 {
|
||||
return nil, fmt.Errorf("prefix length %d is not a multiple of 4", ones)
|
||||
}
|
||||
hostNibbles := (128 - ones) / 4
|
||||
if hostNibbles < 2 {
|
||||
return nil, fmt.Errorf("prefix /%d leaves %d host nibble(s); need at least 2 (one field + N)", ones, hostNibbles)
|
||||
}
|
||||
if len(fields) == 0 {
|
||||
return nil, fmt.Errorf("no fields specified; caller should generate random IID directly")
|
||||
}
|
||||
|
||||
fieldNibbles := hostNibbles - 1
|
||||
dist, err := distribute(fieldNibbles, len(fields))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
addr := make(net.IP, net.IPv6len)
|
||||
copy(addr, network.IP.To16())
|
||||
|
||||
// Stream nibbles left-to-right starting at the first host nibble.
|
||||
startNibble := ones / 4
|
||||
pos := 0
|
||||
for i, f := range fields {
|
||||
n := dist[i]
|
||||
v, err := fieldValue(f, vals, n*4)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Write `n` nibbles, most-significant first.
|
||||
for j := n - 1; j >= 0; j-- {
|
||||
nb := byte((v >> uint(j*4)) & 0xF)
|
||||
writeNibble(addr, startNibble+pos, nb)
|
||||
pos++
|
||||
}
|
||||
}
|
||||
writeNibble(addr, startNibble+pos, nNibble&0x0F)
|
||||
|
||||
return addr, nil
|
||||
}
|
||||
|
||||
// distribute splits `total` nibbles across `k` fields as evenly as possible,
|
||||
// giving any remainder to earlier fields one extra nibble at a time.
|
||||
func distribute(total, k int) ([]int, error) {
|
||||
if k <= 0 {
|
||||
return nil, fmt.Errorf("k must be > 0")
|
||||
}
|
||||
if total < k {
|
||||
return nil, fmt.Errorf("not enough host nibbles (%d) for %d fields", total, k)
|
||||
}
|
||||
out := make([]int, k)
|
||||
base := total / k
|
||||
rem := total % k
|
||||
for i := range out {
|
||||
out[i] = base
|
||||
if i < rem {
|
||||
out[i]++
|
||||
}
|
||||
if out[i] > MaxFieldNibbles {
|
||||
return nil, fmt.Errorf("field %d would need %d nibbles; max supported is %d", i, out[i], MaxFieldNibbles)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// fieldValue returns the top `bits` bits of the hash-or-digest for `f`,
|
||||
// right-aligned in the returned uint64.
|
||||
func fieldValue(f Field, v Values, bits int) (uint64, error) {
|
||||
if bits <= 0 || bits > 64 {
|
||||
return 0, fmt.Errorf("bad field bits %d (1..64)", bits)
|
||||
}
|
||||
switch f {
|
||||
case FieldNamespace:
|
||||
return topBitsFNV(v.Namespace, bits), nil
|
||||
case FieldPod:
|
||||
return topBitsFNV(v.Pod, bits), nil
|
||||
case FieldImage:
|
||||
if v.Image != "" {
|
||||
return topBitsHex(v.Image, bits)
|
||||
}
|
||||
return topBitsFNV(v.ImageFallback, bits), nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown field %q", f)
|
||||
}
|
||||
}
|
||||
|
||||
func topBitsFNV(s string, bits int) uint64 {
|
||||
h := fnv.New64a()
|
||||
_, _ = h.Write([]byte(s))
|
||||
return h.Sum64() >> uint(64-bits)
|
||||
}
|
||||
|
||||
// topBitsHex parses a leading sha256-digest-style hex string and returns
|
||||
// its top `bits` bits, right-aligned. Accepts an optional "sha256:" prefix.
|
||||
func topBitsHex(s string, bits int) (uint64, error) {
|
||||
s = strings.TrimPrefix(s, "sha256:")
|
||||
if len(s) < 16 { // need at least 8 bytes / 64 bits to right-shift
|
||||
return 0, fmt.Errorf("image digest too short: %d hex chars", len(s))
|
||||
}
|
||||
b, err := hex.DecodeString(s[:16])
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("image digest not hex: %w", err)
|
||||
}
|
||||
var v uint64
|
||||
for _, x := range b {
|
||||
v = (v << 8) | uint64(x)
|
||||
}
|
||||
return v >> uint(64-bits), nil
|
||||
}
|
||||
|
||||
// writeNibble sets the (nibIdx)-th nibble of addr (0 = highest nibble of byte 0).
|
||||
func writeNibble(addr net.IP, nibIdx int, nb byte) {
|
||||
bytePos := nibIdx / 2
|
||||
if nibIdx%2 == 0 {
|
||||
addr[bytePos] = (addr[bytePos] & 0x0F) | (nb << 4)
|
||||
} else {
|
||||
addr[bytePos] = (addr[bytePos] & 0xF0) | (nb & 0x0F)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
package embed
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func mustCIDR(t *testing.T, s string) *net.IPNet {
|
||||
t.Helper()
|
||||
_, n, err := net.ParseCIDR(s)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseCIDR(%q): %v", s, err)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func TestDistribute(t *testing.T) {
|
||||
cases := []struct {
|
||||
total, k int
|
||||
want []int
|
||||
}{
|
||||
// from the doc table
|
||||
{19, 1, []int{19}}, // /48 1 field — would exceed MaxFieldNibbles, see error test below
|
||||
{19, 2, []int{10, 9}},
|
||||
{19, 3, []int{7, 6, 6}},
|
||||
{17, 1, []int{17}},
|
||||
{17, 2, []int{9, 8}},
|
||||
{17, 3, []int{6, 6, 5}},
|
||||
{15, 1, []int{15}},
|
||||
{15, 2, []int{8, 7}},
|
||||
{15, 3, []int{5, 5, 5}},
|
||||
{11, 1, []int{11}},
|
||||
{11, 2, []int{6, 5}},
|
||||
{11, 3, []int{4, 4, 3}},
|
||||
{7, 1, []int{7}},
|
||||
{7, 2, []int{4, 3}},
|
||||
{7, 3, []int{3, 2, 2}},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got, err := distribute(c.total, c.k)
|
||||
if c.total > MaxFieldNibbles && c.k == 1 {
|
||||
if err == nil {
|
||||
t.Errorf("distribute(%d,%d): expected MaxFieldNibbles error", c.total, c.k)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("distribute(%d,%d): %v", c.total, c.k, err)
|
||||
continue
|
||||
}
|
||||
if !equal(got, c.want) {
|
||||
t.Errorf("distribute(%d,%d) = %v, want %v", c.total, c.k, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func equal(a, b []int) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestEmbed_Slash64Deterministic(t *testing.T) {
|
||||
// /64 with 3 fields: 5+5+5+1 nibbles = 64-bit IID.
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
addr, err := Embed(net64,
|
||||
[]Field{FieldNamespace, FieldPod, FieldImage},
|
||||
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
|
||||
0xe,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Embed: %v", err)
|
||||
}
|
||||
// Property: same inputs → same output (twice).
|
||||
addr2, err := Embed(net64,
|
||||
[]Field{FieldNamespace, FieldPod, FieldImage},
|
||||
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
|
||||
0xe,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !addr.Equal(addr2) {
|
||||
t.Fatalf("non-deterministic: %s vs %s", addr, addr2)
|
||||
}
|
||||
// Property: prefix preserved.
|
||||
if !net64.Contains(addr) {
|
||||
t.Fatalf("addr %s outside network %s", addr, net64)
|
||||
}
|
||||
// Property: last nibble is exactly N.
|
||||
if got := addr[len(addr)-1] & 0x0F; got != 0xe {
|
||||
t.Fatalf("last nibble = %x, want e", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_DifferentInputsDifferentOutputs(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
a, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns1", Pod: "p1"}, 0)
|
||||
b, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns2", Pod: "p1"}, 0)
|
||||
if a.Equal(b) {
|
||||
t.Fatalf("different namespace produced identical IID: %s", a)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_NRandomizesLowNibble(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
a, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x1)
|
||||
b, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x2)
|
||||
if a.Equal(b) {
|
||||
t.Fatalf("changing N did not change address")
|
||||
}
|
||||
// And the only difference should be the last nibble.
|
||||
if a[15]>>4 != b[15]>>4 {
|
||||
t.Fatalf("upper nibble of last byte changed unexpectedly: %x vs %x", a[15], b[15])
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_RejectsBadInputs(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
if _, err := Embed(net64, nil, Values{}, 0); err == nil {
|
||||
t.Fatalf("expected error for empty fields")
|
||||
}
|
||||
odd := &net.IPNet{IP: net.ParseIP("2602:817:3000::"), Mask: net.CIDRMask(63, 128)}
|
||||
if _, err := Embed(odd, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil {
|
||||
t.Fatalf("expected error for /63 (not nibble-aligned)")
|
||||
}
|
||||
v4 := &net.IPNet{IP: net.ParseIP("10.0.0.0").To4(), Mask: net.CIDRMask(8, 32)}
|
||||
if _, err := Embed(v4, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil {
|
||||
t.Fatalf("expected error for IPv4 network")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_ImageDigestVsFallback(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
digest := "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011"
|
||||
a, err := Embed(net64, []Field{FieldImage}, Values{Image: digest}, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Embed digest: %v", err)
|
||||
}
|
||||
b, err := Embed(net64, []Field{FieldImage}, Values{ImageFallback: "ctr-xyz"}, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Embed fallback: %v", err)
|
||||
}
|
||||
if a.Equal(b) {
|
||||
t.Fatalf("digest and fallback produced same IID")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user