flock M1 scaffold: CNI plugin + agent + NodeConfig CRD
Build flock Image / build (push) Has been cancelled

- cmd/flock + cmd/flock-agent: build cleanly; CNI ADD/DEL/CHECK return
  ErrInternal stubs until M2; agent boots, opens unix socket, logs JSON.
- pkg/agent/state.go: durable allocations.json (atomic write + fsync +
  parent fsync); pending/committed lifecycle. Tests cover round-trip,
  replace-by-cid, version mismatch, no-leak-on-tmp.
- pkg/embed/suffix.go: ip-algo IID embedding. Tests cover the /48-/96
  nibble distribution table from the design doc, determinism, prefix
  preservation, N-nibble isolation, digest-vs-fallback divergence.
- pkg/api/v1alpha1: minimal NodeConfig types (no controller-runtime yet).
- deploy/: NodeConfig CRD, empty ServiceAccount/ClusterRole, DaemonSet
  pinned to flock.fritzlab.net/agent="" label so it only runs on opted-in
  nodes.
- .gitea/workflows/main.yaml + Dockerfile: build + push to
  code.fritzlab.net/fritzlab/flock; runs go test in CI.

Design doc: dfritzlab/k8s-manager/dfritz-cni.md.

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Donavan Fritz
2026-04-24 21:17:42 -05:00
commit 20f47916af
22 changed files with 1460 additions and 0 deletions
+8
View File
@@ -0,0 +1,8 @@
.git
.gitea
deploy
hack
test
README.md
LICENSE
*.md
+60
View File
@@ -0,0 +1,60 @@
name: Build flock Image
on:
push:
branches: [main]
jobs:
build:
runs-on: fritzlab
steps:
- name: Check out repo
uses: actions/checkout@v4
- name: Run unit tests
run: |
docker run --rm -v "$PWD:/src" -w /src golang:1.26-alpine \
sh -c "go test ./..."
- name: Log in to Gitea registry
uses: docker/login-action@v3
with:
registry: code.fritzlab.net
username: ci-bot
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: code.fritzlab.net/fritzlab/flock
tags: |
type=raw,value=latest
type=raw,value=${{ github.run_number }}
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
push: true
provenance: false
build-args: |
GIT_SHA=${{ github.sha }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
network: host
- name: Smoke-test image
run: |
docker run --rm code.fritzlab.net/fritzlab/flock:${{ github.run_number }} --help || true
docker run --rm --entrypoint /usr/local/bin/flock \
code.fritzlab.net/fritzlab/flock:${{ github.run_number }} || true
- name: Clean up old image tags
run: |
tea login add --name ci --url https://code.fritzlab.net --token '${{ secrets.CI_BOT_TOKEN }}' --no-version-check
tea api '/packages/fritzlab?type=container' \
| jq -r '.[] | select(.name=="flock") | select(.version | test("^[0-9]+$")) | .version' \
| sort -n | head -n -3 \
| while read tag; do
echo "deleting flock:$tag"
tea api -X DELETE "/packages/fritzlab/container/flock/$tag"
done
+22
View File
@@ -0,0 +1,22 @@
FROM golang:1.26-alpine AS build
WORKDIR /src
COPY go.mod go.sum ./
RUN go mod download
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
ARG GIT_SHA=unknown
RUN CGO_ENABLED=0 go build -trimpath \
-ldflags="-s -w -X main.gitSHA=${GIT_SHA}" \
-o /out/flock ./cmd/flock \
&& CGO_ENABLED=0 go build -trimpath \
-ldflags="-s -w -X main.gitSHA=${GIT_SHA}" \
-o /out/flock-agent ./cmd/flock-agent
FROM alpine:3.21
RUN apk add --no-cache iproute2 ca-certificates
COPY --from=build /out/flock /usr/local/bin/flock
COPY --from=build /out/flock-agent /usr/local/bin/flock-agent
ENTRYPOINT ["/usr/local/bin/flock-agent"]
+15
View File
@@ -0,0 +1,15 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+22
View File
@@ -0,0 +1,22 @@
# flock
Kubernetes CNI for sjc001. Per-pod IPv4 opt-in, IID embedding, Ready-gated anycast via BGP.
Design doc: `k8s-manager/dfritz-cni.md` (in the operator's k8s-manager repo).
Status: M1 scaffold. Not functional. See milestones table in the design doc.
## Layout
- `cmd/flock` — CNI plugin binary (kubelet-invoked)
- `cmd/flock-agent` — DaemonSet binary
- `pkg/api/v1alpha1``NodeConfig` CRD types
- `pkg/cni` — CNI plugin internals + RPC client
- `pkg/agent` — agent server, IPAM, state file, anycast, NetworkPolicy
- `pkg/embed``ip-algo` IID embedding (pure)
- `pkg/routing/{bird,ospf}` — routing backends
- `deploy/` — CRDs, RBAC, DaemonSet manifests
## License
Apache 2.0.
+50
View File
@@ -0,0 +1,50 @@
// Command flock-agent is the per-node DaemonSet binary. It owns IPAM, netns
// programming, BIRD config, and nftables. M1 boots only the state store and a
// placeholder unix listener.
package main
import (
"context"
"flag"
"log/slog"
"os"
"os/signal"
"syscall"
"code.fritzlab.net/fritzlab/flock/pkg/agent"
)
func main() {
var (
node = flag.String("node", os.Getenv("NODE_NAME"), "node name (defaults to $NODE_NAME)")
statePath = flag.String("state", "/var/lib/flock/allocations.json", "path to allocations.json")
socket = flag.String("socket", agent.SocketPath, "unix socket for CNI RPC")
)
flag.Parse()
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
if *node == "" {
logger.Error("--node or $NODE_NAME is required")
os.Exit(2)
}
srv, err := agent.NewServer(agent.Config{
Node: *node,
StatePath: *statePath,
Socket: *socket,
Logger: logger,
})
if err != nil {
logger.Error("init server", "err", err)
os.Exit(1)
}
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()
if err := srv.Run(ctx); err != nil {
logger.Error("run", "err", err)
os.Exit(1)
}
}
+19
View File
@@ -0,0 +1,19 @@
// Command flock is the kubelet-invoked CNI plugin binary. It is short-lived:
// kubelet execs it for ADD/DEL/CHECK, the binary forwards to flock-agent over
// a unix socket, and exits.
package main
import (
"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/version"
"code.fritzlab.net/fritzlab/flock/pkg/cni"
)
func main() {
skel.PluginMainFuncs(skel.CNIFuncs{
Add: cni.CmdAdd,
Del: cni.CmdDel,
Check: cni.CmdCheck,
}, version.All, "flock")
}
@@ -0,0 +1,72 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: nodeconfigs.flock.fritzlab.net
spec:
group: flock.fritzlab.net
scope: Cluster
names:
kind: NodeConfig
listKind: NodeConfigList
singular: nodeconfig
plural: nodeconfigs
shortNames:
- fnc
versions:
- name: v1alpha1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
required: [spec]
properties:
spec:
type: object
required: [bgp]
properties:
cidr6:
type: array
items:
type: string
description: IPv6 CIDR owned and aggregate-advertised by this node.
cidr4:
type: array
items:
type: string
description: IPv4 CIDR owned and aggregate-advertised by this node.
bgp:
type: object
required: [asn, peers]
properties:
asn:
type: integer
format: int64
minimum: 1
maximum: 4294967295
description: This node's local ASN.
peers:
type: array
minItems: 1
items:
type: object
required: [address, asn]
properties:
address:
type: string
description: Peer IP (IPv6 or IPv4).
asn:
type: integer
format: int64
minimum: 1
maximum: 4294967295
additionalPrinterColumns:
- name: ASN
type: integer
jsonPath: .spec.bgp.asn
- name: CIDR6
type: string
jsonPath: .spec.cidr6
- name: CIDR4
type: string
jsonPath: .spec.cidr4
+84
View File
@@ -0,0 +1,84 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: flock-agent
namespace: kube-system
labels:
app: flock-agent
spec:
selector:
matchLabels:
app: flock-agent
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
app: flock-agent
spec:
serviceAccountName: flock-agent
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
priorityClassName: system-node-critical
# M1: opt-in per-node via this label. Remove the nodeSelector when
# ready to roll flock to all nodes.
nodeSelector:
flock.fritzlab.net/agent: ""
# Tolerate the cni-test taint applied during migration.
tolerations:
- key: fritzlab.net/cni-test
operator: Equal
value: "true"
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoExecute
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoExecute
containers:
- name: flock-agent
image: code.fritzlab.net/fritzlab/flock:latest
imagePullPolicy: Always
args:
- --node=$(NODE_NAME)
- --state=/var/lib/flock/allocations.json
- --socket=/run/flock/flock.sock
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
# M1: no privileged caps. M2 adds NET_ADMIN/NET_RAW for netlink.
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop: ["ALL"]
volumeMounts:
- name: lib-flock
mountPath: /var/lib/flock
- name: run-flock
mountPath: /run/flock
resources:
requests:
cpu: 25m
memory: 32Mi
limits:
memory: 128Mi
volumes:
- name: lib-flock
hostPath:
path: /var/lib/flock
type: DirectoryOrCreate
- name: run-flock
hostPath:
path: /run/flock
type: DirectoryOrCreate
imagePullSecrets:
- name: code-fritzlab-net
+184
View File
@@ -0,0 +1,184 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: nodeconfigs.flock.fritzlab.net
spec:
group: flock.fritzlab.net
scope: Cluster
names:
kind: NodeConfig
listKind: NodeConfigList
singular: nodeconfig
plural: nodeconfigs
shortNames:
- fnc
versions:
- name: v1alpha1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
required: [spec]
properties:
spec:
type: object
required: [bgp]
properties:
cidr6:
type: array
items:
type: string
description: IPv6 CIDR owned and aggregate-advertised by this node.
cidr4:
type: array
items:
type: string
description: IPv4 CIDR owned and aggregate-advertised by this node.
bgp:
type: object
required: [asn, peers]
properties:
asn:
type: integer
format: int64
minimum: 1
maximum: 4294967295
description: This node's local ASN.
peers:
type: array
minItems: 1
items:
type: object
required: [address, asn]
properties:
address:
type: string
description: Peer IP (IPv6 or IPv4).
asn:
type: integer
format: int64
minimum: 1
maximum: 4294967295
additionalPrinterColumns:
- name: ASN
type: integer
jsonPath: .spec.bgp.asn
- name: CIDR6
type: string
jsonPath: .spec.cidr6
- name: CIDR4
type: string
jsonPath: .spec.cidr4
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: flock-agent
namespace: kube-system
---
# M1 RBAC: empty. The agent does not yet read any Kubernetes objects.
# M2+ will add Pod, NetworkPolicy, and NodeConfig permissions here.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: flock-agent
rules: []
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: flock-agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flock-agent
subjects:
- kind: ServiceAccount
name: flock-agent
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: flock-agent
namespace: kube-system
labels:
app: flock-agent
spec:
selector:
matchLabels:
app: flock-agent
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
app: flock-agent
spec:
serviceAccountName: flock-agent
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
priorityClassName: system-node-critical
# M1: opt-in per-node via this label. Remove the nodeSelector when
# ready to roll flock to all nodes.
nodeSelector:
flock.fritzlab.net/agent: ""
# Tolerate the cni-test taint applied during migration.
tolerations:
- key: fritzlab.net/cni-test
operator: Equal
value: "true"
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoExecute
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoExecute
containers:
- name: flock-agent
image: code.fritzlab.net/fritzlab/flock:latest
imagePullPolicy: Always
args:
- --node=$(NODE_NAME)
- --state=/var/lib/flock/allocations.json
- --socket=/run/flock/flock.sock
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
# M1: no privileged caps. M2 adds NET_ADMIN/NET_RAW for netlink.
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop: ["ALL"]
volumeMounts:
- name: lib-flock
mountPath: /var/lib/flock
- name: run-flock
mountPath: /run/flock
resources:
requests:
cpu: 25m
memory: 32Mi
limits:
memory: 128Mi
volumes:
- name: lib-flock
hostPath:
path: /var/lib/flock
type: DirectoryOrCreate
- name: run-flock
hostPath:
path: /run/flock
type: DirectoryOrCreate
imagePullSecrets:
- name: code-fritzlab-net
+26
View File
@@ -0,0 +1,26 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: flock-agent
namespace: kube-system
---
# M1 RBAC: empty. The agent does not yet read any Kubernetes objects.
# M2+ will add Pod, NetworkPolicy, and NodeConfig permissions here.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: flock-agent
rules: []
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: flock-agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flock-agent
subjects:
- kind: ServiceAccount
name: flock-agent
namespace: kube-system
+10
View File
@@ -0,0 +1,10 @@
module code.fritzlab.net/fritzlab/flock
go 1.26.1
require github.com/containernetworking/cni v1.3.0
require (
github.com/vishvananda/netns v0.0.4 // indirect
golang.org/x/sys v0.23.0 // indirect
)
+28
View File
@@ -0,0 +1,28 @@
github.com/containernetworking/cni v1.3.0 h1:v6EpN8RznAZj9765HhXQrtXgX+ECGebEYEmnuFjskwo=
github.com/containernetworking/cni v1.3.0/go.mod h1:Bs8glZjjFfGPHMw6hQu82RUgEPNGEaBb9KS5KtNMnJ4=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo=
github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+100
View File
@@ -0,0 +1,100 @@
package agent
import (
"context"
"fmt"
"log/slog"
"net"
"os"
"path/filepath"
)
// SocketPath is the unix socket on which flock-agent serves RPCs from the
// CNI plugin. Mirrors pkg/cni.SocketPath; kept as a separate constant so the
// agent package has no import-cycle on the CNI package.
const SocketPath = "/run/flock/flock.sock"
// Server is the agent's runtime container: state store, kubernetes informers,
// netlink, BIRD, nftables. M1 wires only the state store and a placeholder
// listener so the binary boots and exits cleanly under a context.
type Server struct {
Node string
Store *Store
Logger *slog.Logger
socket string
closeCh chan struct{}
}
// Config configures NewServer.
type Config struct {
Node string
StatePath string // typically /var/lib/flock/allocations.json
Socket string // typically /run/flock/flock.sock
Logger *slog.Logger
}
// NewServer constructs a Server. It does NOT start any goroutines; call Run.
func NewServer(cfg Config) (*Server, error) {
if cfg.Node == "" {
return nil, fmt.Errorf("Node must be set")
}
if cfg.StatePath == "" {
cfg.StatePath = "/var/lib/flock/allocations.json"
}
if cfg.Socket == "" {
cfg.Socket = SocketPath
}
if cfg.Logger == nil {
cfg.Logger = slog.Default()
}
if err := os.MkdirAll(filepath.Dir(cfg.StatePath), 0o750); err != nil {
return nil, fmt.Errorf("mkdir state dir: %w", err)
}
store, err := NewStore(cfg.StatePath, cfg.Node)
if err != nil {
return nil, fmt.Errorf("open store: %w", err)
}
return &Server{
Node: cfg.Node,
Store: store,
Logger: cfg.Logger,
socket: cfg.Socket,
closeCh: make(chan struct{}),
}, nil
}
// Run starts the agent and blocks until ctx is cancelled. M1 only opens the
// unix listener (proving permissions/path); the RPC handler is a no-op
// returning ENOSYS until M2.
func (s *Server) Run(ctx context.Context) error {
if err := os.MkdirAll(filepath.Dir(s.socket), 0o750); err != nil {
return fmt.Errorf("mkdir socket dir: %w", err)
}
_ = os.Remove(s.socket)
l, err := net.Listen("unix", s.socket)
if err != nil {
return fmt.Errorf("listen %s: %w", s.socket, err)
}
defer l.Close()
s.Logger.Info("flock-agent started",
"node", s.Node,
"socket", s.socket,
"allocations", len(s.Store.Snapshot()),
)
// Accept loop: M1 closes every accepted conn immediately. M2 will dispatch.
go func() {
for {
conn, err := l.Accept()
if err != nil {
return // listener closed
}
_ = conn.Close()
}
}()
<-ctx.Done()
s.Logger.Info("flock-agent stopping")
return nil
}
+204
View File
@@ -0,0 +1,204 @@
// Package agent owns the in-process flock-agent runtime: IPAM, netns, state,
// anycast, and NetworkPolicy. This file implements the durable per-node
// allocation file at /var/lib/flock/allocations.json.
package agent
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"
)
// AllocationState is the lifecycle marker for an entry in allocations.json.
//
// pending — IPAM picked addresses; netlink work may be incomplete.
// On agent startup these are GC'd: addrs/routes/veth removed.
// committed — netlink ops finished; entry is the source of truth.
type AllocationState string
const (
StatePending AllocationState = "pending"
StateCommitted AllocationState = "committed"
)
// Allocation is a single per-pod entry persisted in allocations.json.
type Allocation struct {
ContainerID string `json:"container_id"`
Namespace string `json:"namespace"`
PodName string `json:"pod_name"`
OwnerUID string `json:"owner_uid"`
IP6 string `json:"ip6,omitempty"`
IP4 string `json:"ip4,omitempty"`
Anycast []string `json:"anycast,omitempty"`
State AllocationState `json:"state"`
AllocatedAt time.Time `json:"allocated_at"`
}
// State is the on-disk file shape. Version is bumped on incompatible changes.
type State struct {
Version int `json:"version"`
Node string `json:"node"`
Allocations []Allocation `json:"allocations"`
}
const stateVersion = 1
// Store is the durable allocation store.
//
// All public methods are safe for concurrent use. They serialize through
// a single mutex so that the on-disk file is always consistent and so that
// the CNI ADD/DEL critical sections (which mutate kernel state alongside the
// file) can rely on snapshot semantics.
type Store struct {
mu sync.Mutex
path string
node string
data State
}
// NewStore opens (or creates) a per-node store. The directory containing
// `path` must already exist; we do not create it because in production it is
// /var/lib/flock managed by the DaemonSet, not the agent process.
func NewStore(path, node string) (*Store, error) {
s := &Store{path: path, node: node}
if err := s.load(); err != nil {
return nil, err
}
return s, nil
}
func (s *Store) load() error {
b, err := os.ReadFile(s.path)
if os.IsNotExist(err) {
s.data = State{Version: stateVersion, Node: s.node, Allocations: []Allocation{}}
return nil
}
if err != nil {
return fmt.Errorf("read state: %w", err)
}
if err := json.Unmarshal(b, &s.data); err != nil {
return fmt.Errorf("parse state: %w", err)
}
if s.data.Version != stateVersion {
return fmt.Errorf("state version %d, want %d", s.data.Version, stateVersion)
}
if s.data.Allocations == nil {
s.data.Allocations = []Allocation{}
}
return nil
}
// flushLocked writes the in-memory state to disk durably:
//
// 1. write to <path>.tmp
// 2. fsync(tmpfd)
// 3. rename to <path>
// 4. fsync(parent dir) — required so the rename survives power loss.
//
// Caller MUST hold s.mu.
func (s *Store) flushLocked() error {
b, err := json.MarshalIndent(s.data, "", " ")
if err != nil {
return fmt.Errorf("marshal state: %w", err)
}
tmp := s.path + ".tmp"
f, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
if err != nil {
return fmt.Errorf("open tmp: %w", err)
}
if _, err := f.Write(b); err != nil {
f.Close()
os.Remove(tmp)
return fmt.Errorf("write tmp: %w", err)
}
if err := f.Sync(); err != nil {
f.Close()
os.Remove(tmp)
return fmt.Errorf("fsync tmp: %w", err)
}
if err := f.Close(); err != nil {
os.Remove(tmp)
return fmt.Errorf("close tmp: %w", err)
}
if err := os.Rename(tmp, s.path); err != nil {
os.Remove(tmp)
return fmt.Errorf("rename: %w", err)
}
dir, err := os.Open(filepath.Dir(s.path))
if err != nil {
return fmt.Errorf("open parent: %w", err)
}
defer dir.Close()
if err := dir.Sync(); err != nil {
return fmt.Errorf("fsync parent: %w", err)
}
return nil
}
// Get returns the entry for containerID, ok=false if absent. Returned value
// is a copy; mutations do not affect store state.
func (s *Store) Get(containerID string) (Allocation, bool) {
s.mu.Lock()
defer s.mu.Unlock()
for _, a := range s.data.Allocations {
if a.ContainerID == containerID {
return a, true
}
}
return Allocation{}, false
}
// Upsert inserts or replaces the entry for a.ContainerID and flushes.
func (s *Store) Upsert(a Allocation) error {
s.mu.Lock()
defer s.mu.Unlock()
for i, ex := range s.data.Allocations {
if ex.ContainerID == a.ContainerID {
s.data.Allocations[i] = a
return s.flushLocked()
}
}
s.data.Allocations = append(s.data.Allocations, a)
return s.flushLocked()
}
// Delete removes the entry for containerID (no-op if absent) and flushes.
func (s *Store) Delete(containerID string) error {
s.mu.Lock()
defer s.mu.Unlock()
for i, a := range s.data.Allocations {
if a.ContainerID == containerID {
s.data.Allocations = append(s.data.Allocations[:i], s.data.Allocations[i+1:]...)
return s.flushLocked()
}
}
return nil
}
// Snapshot returns a defensive copy of all allocations.
func (s *Store) Snapshot() []Allocation {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]Allocation, len(s.data.Allocations))
copy(out, s.data.Allocations)
return out
}
// PendingContainerIDs returns container IDs whose entries are State==pending.
// Used by agent startup GC.
func (s *Store) PendingContainerIDs() []string {
s.mu.Lock()
defer s.mu.Unlock()
var out []string
for _, a := range s.data.Allocations {
if a.State == StatePending {
out = append(out, a.ContainerID)
}
}
return out
}
+125
View File
@@ -0,0 +1,125 @@
package agent
import (
"os"
"path/filepath"
"testing"
"time"
)
func newStore(t *testing.T) (*Store, string) {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "allocations.json")
s, err := NewStore(path, "host001")
if err != nil {
t.Fatalf("NewStore: %v", err)
}
return s, path
}
func TestStore_EmptyOnFirstOpen(t *testing.T) {
s, _ := newStore(t)
if got := len(s.Snapshot()); got != 0 {
t.Fatalf("Snapshot len = %d, want 0", got)
}
}
func TestStore_UpsertGetDelete(t *testing.T) {
s, path := newStore(t)
a := Allocation{
ContainerID: "abc",
Namespace: "mail",
PodName: "stalwart-0",
OwnerUID: "uid-1",
IP6: "2602:817:3000:f001::1",
State: StateCommitted,
AllocatedAt: time.Now().UTC().Truncate(time.Second),
}
if err := s.Upsert(a); err != nil {
t.Fatalf("Upsert: %v", err)
}
got, ok := s.Get("abc")
if !ok || got.PodName != "stalwart-0" {
t.Fatalf("Get after Upsert: ok=%v got=%+v", ok, got)
}
// Round-trip: a fresh Store reading the same path sees the entry.
s2, err := NewStore(path, "host001")
if err != nil {
t.Fatalf("reopen: %v", err)
}
if got, ok := s2.Get("abc"); !ok || got.IP6 != a.IP6 {
t.Fatalf("reopen Get: ok=%v got=%+v", ok, got)
}
if err := s.Delete("abc"); err != nil {
t.Fatalf("Delete: %v", err)
}
if _, ok := s.Get("abc"); ok {
t.Fatalf("entry still present after Delete")
}
}
func TestStore_UpsertReplacesByContainerID(t *testing.T) {
s, _ := newStore(t)
must := func(err error) {
t.Helper()
if err != nil {
t.Fatal(err)
}
}
must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::1", State: StatePending}))
must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::2", State: StateCommitted}))
if got := len(s.Snapshot()); got != 1 {
t.Fatalf("len = %d, want 1 (Upsert should replace)", got)
}
if a, _ := s.Get("abc"); a.IP6 != "::2" || a.State != StateCommitted {
t.Fatalf("Upsert did not replace: %+v", a)
}
}
func TestStore_PendingContainerIDs(t *testing.T) {
s, _ := newStore(t)
_ = s.Upsert(Allocation{ContainerID: "p1", State: StatePending})
_ = s.Upsert(Allocation{ContainerID: "c1", State: StateCommitted})
_ = s.Upsert(Allocation{ContainerID: "p2", State: StatePending})
pend := s.PendingContainerIDs()
if len(pend) != 2 {
t.Fatalf("PendingContainerIDs len = %d, want 2", len(pend))
}
have := map[string]bool{pend[0]: true, pend[1]: true}
if !have["p1"] || !have["p2"] {
t.Fatalf("PendingContainerIDs = %v, want p1,p2", pend)
}
}
func TestStore_RejectsWrongVersion(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "allocations.json")
if err := os.WriteFile(path, []byte(`{"version":99,"node":"x","allocations":[]}`), 0o600); err != nil {
t.Fatal(err)
}
if _, err := NewStore(path, "x"); err == nil {
t.Fatalf("expected error on bad version, got nil")
}
}
func TestStore_AtomicWriteDurability(t *testing.T) {
// We can't simulate a real power-loss in unit tests, but we can verify
// that no .tmp file is left behind after a successful flush, and that
// the rename target is intact.
s, path := newStore(t)
if err := s.Upsert(Allocation{ContainerID: "x", State: StateCommitted}); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
t.Fatalf(".tmp leaked: err=%v", err)
}
b, err := os.ReadFile(path)
if err != nil || len(b) == 0 {
t.Fatalf("final file unreadable: err=%v len=%d", err, len(b))
}
}
+8
View File
@@ -0,0 +1,8 @@
// Package v1alpha1 contains API Schema definitions for the flock.fritzlab.net
// v1alpha1 API group.
package v1alpha1
const (
GroupName = "flock.fritzlab.net"
Version = "v1alpha1"
)
+54
View File
@@ -0,0 +1,54 @@
package v1alpha1
// NodeConfigSpec is the operator-written desired state for a single node.
//
// The agent reads this on startup and via informer for live updates. There is
// no controller and no auto-allocation — purely declarative input.
type NodeConfigSpec struct {
// CIDR6 is the set of IPv6 CIDRs this node owns and advertises as BGP
// aggregates. Pod IPv6 addresses are allocated from these.
CIDR6 []string `json:"cidr6,omitempty"`
// CIDR4 is the set of IPv4 CIDRs this node owns and advertises as BGP
// aggregates. Pod IPv4 addresses are allocated from these.
CIDR4 []string `json:"cidr4,omitempty"`
// BGP configures the BGP sessions this node establishes upstream.
BGP BGPSpec `json:"bgp"`
}
type BGPSpec struct {
// ASN is this node's local autonomous system number.
ASN uint32 `json:"asn"`
// Peers lists upstream BGP peers (typically the rack/site router).
Peers []BGPPeer `json:"peers"`
}
type BGPPeer struct {
// Address is the peer's IP (IPv6 or IPv4).
Address string `json:"address"`
// ASN is the peer's autonomous system number.
ASN uint32 `json:"asn"`
}
// NodeConfig is the Schema for the nodeconfigs API.
type NodeConfig struct {
TypeMeta `json:",inline"`
ObjectMeta `json:"metadata,omitempty"`
Spec NodeConfigSpec `json:"spec,omitempty"`
}
// TypeMeta and ObjectMeta are minimal stand-ins so this package can be used
// without dragging in k8s.io/apimachinery during the M1 scaffold. They will be
// replaced by metav1.TypeMeta / metav1.ObjectMeta when the agent wires up
// controller-runtime in M2.
type TypeMeta struct {
Kind string `json:"kind,omitempty"`
APIVersion string `json:"apiVersion,omitempty"`
}
type ObjectMeta struct {
Name string `json:"name,omitempty"`
}
+37
View File
@@ -0,0 +1,37 @@
// Package cni hosts the CNI plugin entry-points. The plugin binary is short-
// lived: it is invoked by kubelet, talks to flock-agent over a unix socket,
// and exits. All real work happens in the agent.
package cni
import (
"errors"
"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types"
current "github.com/containernetworking/cni/pkg/types/100"
)
// SocketPath is the unix socket exposed by flock-agent.
const SocketPath = "/run/flock/flock.sock"
var errNotImplemented = errors.New("flock: ADD/DEL/CHECK not implemented in M1 scaffold")
// CmdAdd is invoked by kubelet when a pod sandbox is created.
func CmdAdd(args *skel.CmdArgs) error {
// M2: dial SocketPath, send ADD RPC, return CNI result.
_ = args
_ = current.ImplementedSpecVersion
return types.NewError(types.ErrInternal, "flock-add", errNotImplemented.Error())
}
// CmdDel is invoked by kubelet when a pod sandbox is torn down.
func CmdDel(args *skel.CmdArgs) error {
_ = args
return types.NewError(types.ErrInternal, "flock-del", errNotImplemented.Error())
}
// CmdCheck verifies that the live netns matches the persisted allocation.
func CmdCheck(args *skel.CmdArgs) error {
_ = args
return types.NewError(types.ErrInternal, "flock-check", errNotImplemented.Error())
}
+5
View File
@@ -0,0 +1,5 @@
package cni
// rpc_client.go will hold the JSON-over-unix-socket client used by the CNI
// plugin to call into flock-agent. Stub for M1; implementation lands in M2
// alongside the agent's RPC server.
+174
View File
@@ -0,0 +1,174 @@
// Package embed implements ip-algo: deterministic embedding of pod identity
// (namespace, pod name, image digest) into the host portion of an IPv6
// address. The mapping is operator-friendly cosmetics — NOT a security
// boundary. See dfritz-cni.md "IPv6 IID Embedding" for the full spec.
package embed
import (
"encoding/hex"
"fmt"
"hash/fnv"
"net"
"strings"
)
// Field is one of the supported identity fields.
type Field string
const (
FieldNamespace Field = "namespace"
FieldPod Field = "pod"
FieldImage Field = "image"
)
// Values carries the inputs for one embedding call. Image holds the SHA-256
// manifest digest as 64 hex chars when known; otherwise pass the containerID
// in ImageFallback and we'll FNV-1a-64 it.
type Values struct {
Namespace string
Pod string
Image string // 64-char hex sha256 manifest digest, or empty
ImageFallback string // typically containerID, used when Image=="".
}
// MaxFieldNibbles is the largest single-field width supported by this
// implementation. 16 nibbles = 64 bits = the output width of FNV-1a-64.
// Wider fields would require a wider hash; the design doc tolerates this
// because real deployments use /64 nodes (15 field nibbles total).
const MaxFieldNibbles = 16
// Embed returns the IPv6 address inside `network` whose host portion encodes
// `fields` (in the given order) followed by the random nibble nNibble.
//
// `network` must be an IPv6 prefix whose length is a multiple of 4 (so the
// host portion is a whole number of nibbles).
//
// `fields` must be non-empty. For a fully-random IID, the caller should pick
// random bytes directly rather than calling Embed.
//
// nNibble is the random "instance" nibble; only the low 4 bits are used.
// Callers regenerate it on collision (see allocations.json).
func Embed(network *net.IPNet, fields []Field, vals Values, nNibble byte) (net.IP, error) {
ones, bits := network.Mask.Size()
if bits != 128 {
return nil, fmt.Errorf("network is not IPv6: %s", network)
}
if ones%4 != 0 {
return nil, fmt.Errorf("prefix length %d is not a multiple of 4", ones)
}
hostNibbles := (128 - ones) / 4
if hostNibbles < 2 {
return nil, fmt.Errorf("prefix /%d leaves %d host nibble(s); need at least 2 (one field + N)", ones, hostNibbles)
}
if len(fields) == 0 {
return nil, fmt.Errorf("no fields specified; caller should generate random IID directly")
}
fieldNibbles := hostNibbles - 1
dist, err := distribute(fieldNibbles, len(fields))
if err != nil {
return nil, err
}
addr := make(net.IP, net.IPv6len)
copy(addr, network.IP.To16())
// Stream nibbles left-to-right starting at the first host nibble.
startNibble := ones / 4
pos := 0
for i, f := range fields {
n := dist[i]
v, err := fieldValue(f, vals, n*4)
if err != nil {
return nil, err
}
// Write `n` nibbles, most-significant first.
for j := n - 1; j >= 0; j-- {
nb := byte((v >> uint(j*4)) & 0xF)
writeNibble(addr, startNibble+pos, nb)
pos++
}
}
writeNibble(addr, startNibble+pos, nNibble&0x0F)
return addr, nil
}
// distribute splits `total` nibbles across `k` fields as evenly as possible,
// giving any remainder to earlier fields one extra nibble at a time.
func distribute(total, k int) ([]int, error) {
if k <= 0 {
return nil, fmt.Errorf("k must be > 0")
}
if total < k {
return nil, fmt.Errorf("not enough host nibbles (%d) for %d fields", total, k)
}
out := make([]int, k)
base := total / k
rem := total % k
for i := range out {
out[i] = base
if i < rem {
out[i]++
}
if out[i] > MaxFieldNibbles {
return nil, fmt.Errorf("field %d would need %d nibbles; max supported is %d", i, out[i], MaxFieldNibbles)
}
}
return out, nil
}
// fieldValue returns the top `bits` bits of the hash-or-digest for `f`,
// right-aligned in the returned uint64.
func fieldValue(f Field, v Values, bits int) (uint64, error) {
if bits <= 0 || bits > 64 {
return 0, fmt.Errorf("bad field bits %d (1..64)", bits)
}
switch f {
case FieldNamespace:
return topBitsFNV(v.Namespace, bits), nil
case FieldPod:
return topBitsFNV(v.Pod, bits), nil
case FieldImage:
if v.Image != "" {
return topBitsHex(v.Image, bits)
}
return topBitsFNV(v.ImageFallback, bits), nil
default:
return 0, fmt.Errorf("unknown field %q", f)
}
}
func topBitsFNV(s string, bits int) uint64 {
h := fnv.New64a()
_, _ = h.Write([]byte(s))
return h.Sum64() >> uint(64-bits)
}
// topBitsHex parses a leading sha256-digest-style hex string and returns
// its top `bits` bits, right-aligned. Accepts an optional "sha256:" prefix.
func topBitsHex(s string, bits int) (uint64, error) {
s = strings.TrimPrefix(s, "sha256:")
if len(s) < 16 { // need at least 8 bytes / 64 bits to right-shift
return 0, fmt.Errorf("image digest too short: %d hex chars", len(s))
}
b, err := hex.DecodeString(s[:16])
if err != nil {
return 0, fmt.Errorf("image digest not hex: %w", err)
}
var v uint64
for _, x := range b {
v = (v << 8) | uint64(x)
}
return v >> uint(64-bits), nil
}
// writeNibble sets the (nibIdx)-th nibble of addr (0 = highest nibble of byte 0).
func writeNibble(addr net.IP, nibIdx int, nb byte) {
bytePos := nibIdx / 2
if nibIdx%2 == 0 {
addr[bytePos] = (addr[bytePos] & 0x0F) | (nb << 4)
} else {
addr[bytePos] = (addr[bytePos] & 0xF0) | (nb & 0x0F)
}
}
+153
View File
@@ -0,0 +1,153 @@
package embed
import (
"net"
"testing"
)
func mustCIDR(t *testing.T, s string) *net.IPNet {
t.Helper()
_, n, err := net.ParseCIDR(s)
if err != nil {
t.Fatalf("ParseCIDR(%q): %v", s, err)
}
return n
}
func TestDistribute(t *testing.T) {
cases := []struct {
total, k int
want []int
}{
// from the doc table
{19, 1, []int{19}}, // /48 1 field — would exceed MaxFieldNibbles, see error test below
{19, 2, []int{10, 9}},
{19, 3, []int{7, 6, 6}},
{17, 1, []int{17}},
{17, 2, []int{9, 8}},
{17, 3, []int{6, 6, 5}},
{15, 1, []int{15}},
{15, 2, []int{8, 7}},
{15, 3, []int{5, 5, 5}},
{11, 1, []int{11}},
{11, 2, []int{6, 5}},
{11, 3, []int{4, 4, 3}},
{7, 1, []int{7}},
{7, 2, []int{4, 3}},
{7, 3, []int{3, 2, 2}},
}
for _, c := range cases {
got, err := distribute(c.total, c.k)
if c.total > MaxFieldNibbles && c.k == 1 {
if err == nil {
t.Errorf("distribute(%d,%d): expected MaxFieldNibbles error", c.total, c.k)
}
continue
}
if err != nil {
t.Errorf("distribute(%d,%d): %v", c.total, c.k, err)
continue
}
if !equal(got, c.want) {
t.Errorf("distribute(%d,%d) = %v, want %v", c.total, c.k, got, c.want)
}
}
}
func equal(a, b []int) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
func TestEmbed_Slash64Deterministic(t *testing.T) {
// /64 with 3 fields: 5+5+5+1 nibbles = 64-bit IID.
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
addr, err := Embed(net64,
[]Field{FieldNamespace, FieldPod, FieldImage},
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
0xe,
)
if err != nil {
t.Fatalf("Embed: %v", err)
}
// Property: same inputs → same output (twice).
addr2, err := Embed(net64,
[]Field{FieldNamespace, FieldPod, FieldImage},
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
0xe,
)
if err != nil {
t.Fatal(err)
}
if !addr.Equal(addr2) {
t.Fatalf("non-deterministic: %s vs %s", addr, addr2)
}
// Property: prefix preserved.
if !net64.Contains(addr) {
t.Fatalf("addr %s outside network %s", addr, net64)
}
// Property: last nibble is exactly N.
if got := addr[len(addr)-1] & 0x0F; got != 0xe {
t.Fatalf("last nibble = %x, want e", got)
}
}
func TestEmbed_DifferentInputsDifferentOutputs(t *testing.T) {
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
a, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns1", Pod: "p1"}, 0)
b, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns2", Pod: "p1"}, 0)
if a.Equal(b) {
t.Fatalf("different namespace produced identical IID: %s", a)
}
}
func TestEmbed_NRandomizesLowNibble(t *testing.T) {
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
a, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x1)
b, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x2)
if a.Equal(b) {
t.Fatalf("changing N did not change address")
}
// And the only difference should be the last nibble.
if a[15]>>4 != b[15]>>4 {
t.Fatalf("upper nibble of last byte changed unexpectedly: %x vs %x", a[15], b[15])
}
}
func TestEmbed_RejectsBadInputs(t *testing.T) {
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
if _, err := Embed(net64, nil, Values{}, 0); err == nil {
t.Fatalf("expected error for empty fields")
}
odd := &net.IPNet{IP: net.ParseIP("2602:817:3000::"), Mask: net.CIDRMask(63, 128)}
if _, err := Embed(odd, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil {
t.Fatalf("expected error for /63 (not nibble-aligned)")
}
v4 := &net.IPNet{IP: net.ParseIP("10.0.0.0").To4(), Mask: net.CIDRMask(8, 32)}
if _, err := Embed(v4, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil {
t.Fatalf("expected error for IPv4 network")
}
}
func TestEmbed_ImageDigestVsFallback(t *testing.T) {
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
digest := "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011"
a, err := Embed(net64, []Field{FieldImage}, Values{Image: digest}, 0)
if err != nil {
t.Fatalf("Embed digest: %v", err)
}
b, err := Embed(net64, []Field{FieldImage}, Values{ImageFallback: "ctr-xyz"}, 0)
if err != nil {
t.Fatalf("Embed fallback: %v", err)
}
if a.Equal(b) {
t.Fatalf("digest and fallback produced same IID")
}
}