flock M1 scaffold: CNI plugin + agent + NodeConfig CRD
Build flock Image / build (push) Has been cancelled
Build flock Image / build (push) Has been cancelled
- cmd/flock + cmd/flock-agent: build cleanly; CNI ADD/DEL/CHECK return ErrInternal stubs until M2; agent boots, opens unix socket, logs JSON. - pkg/agent/state.go: durable allocations.json (atomic write + fsync + parent fsync); pending/committed lifecycle. Tests cover round-trip, replace-by-cid, version mismatch, no-leak-on-tmp. - pkg/embed/suffix.go: ip-algo IID embedding. Tests cover the /48-/96 nibble distribution table from the design doc, determinism, prefix preservation, N-nibble isolation, digest-vs-fallback divergence. - pkg/api/v1alpha1: minimal NodeConfig types (no controller-runtime yet). - deploy/: NodeConfig CRD, empty ServiceAccount/ClusterRole, DaemonSet pinned to flock.fritzlab.net/agent="" label so it only runs on opted-in nodes. - .gitea/workflows/main.yaml + Dockerfile: build + push to code.fritzlab.net/fritzlab/flock; runs go test in CI. Design doc: dfritzlab/k8s-manager/dfritz-cni.md. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,100 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// SocketPath is the unix socket on which flock-agent serves RPCs from the
|
||||
// CNI plugin. Mirrors pkg/cni.SocketPath; kept as a separate constant so the
|
||||
// agent package has no import-cycle on the CNI package.
|
||||
const SocketPath = "/run/flock/flock.sock"
|
||||
|
||||
// Server is the agent's runtime container: state store, kubernetes informers,
|
||||
// netlink, BIRD, nftables. M1 wires only the state store and a placeholder
|
||||
// listener so the binary boots and exits cleanly under a context.
|
||||
type Server struct {
|
||||
Node string
|
||||
Store *Store
|
||||
Logger *slog.Logger
|
||||
socket string
|
||||
closeCh chan struct{}
|
||||
}
|
||||
|
||||
// Config configures NewServer.
|
||||
type Config struct {
|
||||
Node string
|
||||
StatePath string // typically /var/lib/flock/allocations.json
|
||||
Socket string // typically /run/flock/flock.sock
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewServer constructs a Server. It does NOT start any goroutines; call Run.
|
||||
func NewServer(cfg Config) (*Server, error) {
|
||||
if cfg.Node == "" {
|
||||
return nil, fmt.Errorf("Node must be set")
|
||||
}
|
||||
if cfg.StatePath == "" {
|
||||
cfg.StatePath = "/var/lib/flock/allocations.json"
|
||||
}
|
||||
if cfg.Socket == "" {
|
||||
cfg.Socket = SocketPath
|
||||
}
|
||||
if cfg.Logger == nil {
|
||||
cfg.Logger = slog.Default()
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(cfg.StatePath), 0o750); err != nil {
|
||||
return nil, fmt.Errorf("mkdir state dir: %w", err)
|
||||
}
|
||||
store, err := NewStore(cfg.StatePath, cfg.Node)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open store: %w", err)
|
||||
}
|
||||
return &Server{
|
||||
Node: cfg.Node,
|
||||
Store: store,
|
||||
Logger: cfg.Logger,
|
||||
socket: cfg.Socket,
|
||||
closeCh: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run starts the agent and blocks until ctx is cancelled. M1 only opens the
|
||||
// unix listener (proving permissions/path); the RPC handler is a no-op
|
||||
// returning ENOSYS until M2.
|
||||
func (s *Server) Run(ctx context.Context) error {
|
||||
if err := os.MkdirAll(filepath.Dir(s.socket), 0o750); err != nil {
|
||||
return fmt.Errorf("mkdir socket dir: %w", err)
|
||||
}
|
||||
_ = os.Remove(s.socket)
|
||||
l, err := net.Listen("unix", s.socket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("listen %s: %w", s.socket, err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
s.Logger.Info("flock-agent started",
|
||||
"node", s.Node,
|
||||
"socket", s.socket,
|
||||
"allocations", len(s.Store.Snapshot()),
|
||||
)
|
||||
|
||||
// Accept loop: M1 closes every accepted conn immediately. M2 will dispatch.
|
||||
go func() {
|
||||
for {
|
||||
conn, err := l.Accept()
|
||||
if err != nil {
|
||||
return // listener closed
|
||||
}
|
||||
_ = conn.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
<-ctx.Done()
|
||||
s.Logger.Info("flock-agent stopping")
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
// Package agent owns the in-process flock-agent runtime: IPAM, netns, state,
|
||||
// anycast, and NetworkPolicy. This file implements the durable per-node
|
||||
// allocation file at /var/lib/flock/allocations.json.
|
||||
package agent
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// AllocationState is the lifecycle marker for an entry in allocations.json.
|
||||
//
|
||||
// pending — IPAM picked addresses; netlink work may be incomplete.
|
||||
// On agent startup these are GC'd: addrs/routes/veth removed.
|
||||
// committed — netlink ops finished; entry is the source of truth.
|
||||
type AllocationState string
|
||||
|
||||
const (
|
||||
StatePending AllocationState = "pending"
|
||||
StateCommitted AllocationState = "committed"
|
||||
)
|
||||
|
||||
// Allocation is a single per-pod entry persisted in allocations.json.
|
||||
type Allocation struct {
|
||||
ContainerID string `json:"container_id"`
|
||||
Namespace string `json:"namespace"`
|
||||
PodName string `json:"pod_name"`
|
||||
OwnerUID string `json:"owner_uid"`
|
||||
IP6 string `json:"ip6,omitempty"`
|
||||
IP4 string `json:"ip4,omitempty"`
|
||||
Anycast []string `json:"anycast,omitempty"`
|
||||
State AllocationState `json:"state"`
|
||||
AllocatedAt time.Time `json:"allocated_at"`
|
||||
}
|
||||
|
||||
// State is the on-disk file shape. Version is bumped on incompatible changes.
|
||||
type State struct {
|
||||
Version int `json:"version"`
|
||||
Node string `json:"node"`
|
||||
Allocations []Allocation `json:"allocations"`
|
||||
}
|
||||
|
||||
const stateVersion = 1
|
||||
|
||||
// Store is the durable allocation store.
|
||||
//
|
||||
// All public methods are safe for concurrent use. They serialize through
|
||||
// a single mutex so that the on-disk file is always consistent and so that
|
||||
// the CNI ADD/DEL critical sections (which mutate kernel state alongside the
|
||||
// file) can rely on snapshot semantics.
|
||||
type Store struct {
|
||||
mu sync.Mutex
|
||||
path string
|
||||
node string
|
||||
data State
|
||||
}
|
||||
|
||||
// NewStore opens (or creates) a per-node store. The directory containing
|
||||
// `path` must already exist; we do not create it because in production it is
|
||||
// /var/lib/flock managed by the DaemonSet, not the agent process.
|
||||
func NewStore(path, node string) (*Store, error) {
|
||||
s := &Store{path: path, node: node}
|
||||
if err := s.load(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *Store) load() error {
|
||||
b, err := os.ReadFile(s.path)
|
||||
if os.IsNotExist(err) {
|
||||
s.data = State{Version: stateVersion, Node: s.node, Allocations: []Allocation{}}
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("read state: %w", err)
|
||||
}
|
||||
if err := json.Unmarshal(b, &s.data); err != nil {
|
||||
return fmt.Errorf("parse state: %w", err)
|
||||
}
|
||||
if s.data.Version != stateVersion {
|
||||
return fmt.Errorf("state version %d, want %d", s.data.Version, stateVersion)
|
||||
}
|
||||
if s.data.Allocations == nil {
|
||||
s.data.Allocations = []Allocation{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// flushLocked writes the in-memory state to disk durably:
|
||||
//
|
||||
// 1. write to <path>.tmp
|
||||
// 2. fsync(tmpfd)
|
||||
// 3. rename to <path>
|
||||
// 4. fsync(parent dir) — required so the rename survives power loss.
|
||||
//
|
||||
// Caller MUST hold s.mu.
|
||||
func (s *Store) flushLocked() error {
|
||||
b, err := json.MarshalIndent(s.data, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal state: %w", err)
|
||||
}
|
||||
|
||||
tmp := s.path + ".tmp"
|
||||
f, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open tmp: %w", err)
|
||||
}
|
||||
if _, err := f.Write(b); err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("write tmp: %w", err)
|
||||
}
|
||||
if err := f.Sync(); err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("fsync tmp: %w", err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("close tmp: %w", err)
|
||||
}
|
||||
if err := os.Rename(tmp, s.path); err != nil {
|
||||
os.Remove(tmp)
|
||||
return fmt.Errorf("rename: %w", err)
|
||||
}
|
||||
|
||||
dir, err := os.Open(filepath.Dir(s.path))
|
||||
if err != nil {
|
||||
return fmt.Errorf("open parent: %w", err)
|
||||
}
|
||||
defer dir.Close()
|
||||
if err := dir.Sync(); err != nil {
|
||||
return fmt.Errorf("fsync parent: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get returns the entry for containerID, ok=false if absent. Returned value
|
||||
// is a copy; mutations do not affect store state.
|
||||
func (s *Store) Get(containerID string) (Allocation, bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for _, a := range s.data.Allocations {
|
||||
if a.ContainerID == containerID {
|
||||
return a, true
|
||||
}
|
||||
}
|
||||
return Allocation{}, false
|
||||
}
|
||||
|
||||
// Upsert inserts or replaces the entry for a.ContainerID and flushes.
|
||||
func (s *Store) Upsert(a Allocation) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for i, ex := range s.data.Allocations {
|
||||
if ex.ContainerID == a.ContainerID {
|
||||
s.data.Allocations[i] = a
|
||||
return s.flushLocked()
|
||||
}
|
||||
}
|
||||
s.data.Allocations = append(s.data.Allocations, a)
|
||||
return s.flushLocked()
|
||||
}
|
||||
|
||||
// Delete removes the entry for containerID (no-op if absent) and flushes.
|
||||
func (s *Store) Delete(containerID string) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for i, a := range s.data.Allocations {
|
||||
if a.ContainerID == containerID {
|
||||
s.data.Allocations = append(s.data.Allocations[:i], s.data.Allocations[i+1:]...)
|
||||
return s.flushLocked()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Snapshot returns a defensive copy of all allocations.
|
||||
func (s *Store) Snapshot() []Allocation {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
out := make([]Allocation, len(s.data.Allocations))
|
||||
copy(out, s.data.Allocations)
|
||||
return out
|
||||
}
|
||||
|
||||
// PendingContainerIDs returns container IDs whose entries are State==pending.
|
||||
// Used by agent startup GC.
|
||||
func (s *Store) PendingContainerIDs() []string {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
var out []string
|
||||
for _, a := range s.data.Allocations {
|
||||
if a.State == StatePending {
|
||||
out = append(out, a.ContainerID)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func newStore(t *testing.T) (*Store, string) {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "allocations.json")
|
||||
s, err := NewStore(path, "host001")
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore: %v", err)
|
||||
}
|
||||
return s, path
|
||||
}
|
||||
|
||||
func TestStore_EmptyOnFirstOpen(t *testing.T) {
|
||||
s, _ := newStore(t)
|
||||
if got := len(s.Snapshot()); got != 0 {
|
||||
t.Fatalf("Snapshot len = %d, want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_UpsertGetDelete(t *testing.T) {
|
||||
s, path := newStore(t)
|
||||
a := Allocation{
|
||||
ContainerID: "abc",
|
||||
Namespace: "mail",
|
||||
PodName: "stalwart-0",
|
||||
OwnerUID: "uid-1",
|
||||
IP6: "2602:817:3000:f001::1",
|
||||
State: StateCommitted,
|
||||
AllocatedAt: time.Now().UTC().Truncate(time.Second),
|
||||
}
|
||||
if err := s.Upsert(a); err != nil {
|
||||
t.Fatalf("Upsert: %v", err)
|
||||
}
|
||||
|
||||
got, ok := s.Get("abc")
|
||||
if !ok || got.PodName != "stalwart-0" {
|
||||
t.Fatalf("Get after Upsert: ok=%v got=%+v", ok, got)
|
||||
}
|
||||
|
||||
// Round-trip: a fresh Store reading the same path sees the entry.
|
||||
s2, err := NewStore(path, "host001")
|
||||
if err != nil {
|
||||
t.Fatalf("reopen: %v", err)
|
||||
}
|
||||
if got, ok := s2.Get("abc"); !ok || got.IP6 != a.IP6 {
|
||||
t.Fatalf("reopen Get: ok=%v got=%+v", ok, got)
|
||||
}
|
||||
|
||||
if err := s.Delete("abc"); err != nil {
|
||||
t.Fatalf("Delete: %v", err)
|
||||
}
|
||||
if _, ok := s.Get("abc"); ok {
|
||||
t.Fatalf("entry still present after Delete")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_UpsertReplacesByContainerID(t *testing.T) {
|
||||
s, _ := newStore(t)
|
||||
must := func(err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::1", State: StatePending}))
|
||||
must(s.Upsert(Allocation{ContainerID: "abc", IP6: "::2", State: StateCommitted}))
|
||||
if got := len(s.Snapshot()); got != 1 {
|
||||
t.Fatalf("len = %d, want 1 (Upsert should replace)", got)
|
||||
}
|
||||
if a, _ := s.Get("abc"); a.IP6 != "::2" || a.State != StateCommitted {
|
||||
t.Fatalf("Upsert did not replace: %+v", a)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_PendingContainerIDs(t *testing.T) {
|
||||
s, _ := newStore(t)
|
||||
_ = s.Upsert(Allocation{ContainerID: "p1", State: StatePending})
|
||||
_ = s.Upsert(Allocation{ContainerID: "c1", State: StateCommitted})
|
||||
_ = s.Upsert(Allocation{ContainerID: "p2", State: StatePending})
|
||||
|
||||
pend := s.PendingContainerIDs()
|
||||
if len(pend) != 2 {
|
||||
t.Fatalf("PendingContainerIDs len = %d, want 2", len(pend))
|
||||
}
|
||||
have := map[string]bool{pend[0]: true, pend[1]: true}
|
||||
if !have["p1"] || !have["p2"] {
|
||||
t.Fatalf("PendingContainerIDs = %v, want p1,p2", pend)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_RejectsWrongVersion(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "allocations.json")
|
||||
if err := os.WriteFile(path, []byte(`{"version":99,"node":"x","allocations":[]}`), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := NewStore(path, "x"); err == nil {
|
||||
t.Fatalf("expected error on bad version, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_AtomicWriteDurability(t *testing.T) {
|
||||
// We can't simulate a real power-loss in unit tests, but we can verify
|
||||
// that no .tmp file is left behind after a successful flush, and that
|
||||
// the rename target is intact.
|
||||
s, path := newStore(t)
|
||||
if err := s.Upsert(Allocation{ContainerID: "x", State: StateCommitted}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
|
||||
t.Fatalf(".tmp leaked: err=%v", err)
|
||||
}
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil || len(b) == 0 {
|
||||
t.Fatalf("final file unreadable: err=%v len=%d", err, len(b))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
// Package v1alpha1 contains API Schema definitions for the flock.fritzlab.net
|
||||
// v1alpha1 API group.
|
||||
package v1alpha1
|
||||
|
||||
const (
|
||||
GroupName = "flock.fritzlab.net"
|
||||
Version = "v1alpha1"
|
||||
)
|
||||
@@ -0,0 +1,54 @@
|
||||
package v1alpha1
|
||||
|
||||
// NodeConfigSpec is the operator-written desired state for a single node.
|
||||
//
|
||||
// The agent reads this on startup and via informer for live updates. There is
|
||||
// no controller and no auto-allocation — purely declarative input.
|
||||
type NodeConfigSpec struct {
|
||||
// CIDR6 is the set of IPv6 CIDRs this node owns and advertises as BGP
|
||||
// aggregates. Pod IPv6 addresses are allocated from these.
|
||||
CIDR6 []string `json:"cidr6,omitempty"`
|
||||
|
||||
// CIDR4 is the set of IPv4 CIDRs this node owns and advertises as BGP
|
||||
// aggregates. Pod IPv4 addresses are allocated from these.
|
||||
CIDR4 []string `json:"cidr4,omitempty"`
|
||||
|
||||
// BGP configures the BGP sessions this node establishes upstream.
|
||||
BGP BGPSpec `json:"bgp"`
|
||||
}
|
||||
|
||||
type BGPSpec struct {
|
||||
// ASN is this node's local autonomous system number.
|
||||
ASN uint32 `json:"asn"`
|
||||
|
||||
// Peers lists upstream BGP peers (typically the rack/site router).
|
||||
Peers []BGPPeer `json:"peers"`
|
||||
}
|
||||
|
||||
type BGPPeer struct {
|
||||
// Address is the peer's IP (IPv6 or IPv4).
|
||||
Address string `json:"address"`
|
||||
// ASN is the peer's autonomous system number.
|
||||
ASN uint32 `json:"asn"`
|
||||
}
|
||||
|
||||
// NodeConfig is the Schema for the nodeconfigs API.
|
||||
type NodeConfig struct {
|
||||
TypeMeta `json:",inline"`
|
||||
ObjectMeta `json:"metadata,omitempty"`
|
||||
|
||||
Spec NodeConfigSpec `json:"spec,omitempty"`
|
||||
}
|
||||
|
||||
// TypeMeta and ObjectMeta are minimal stand-ins so this package can be used
|
||||
// without dragging in k8s.io/apimachinery during the M1 scaffold. They will be
|
||||
// replaced by metav1.TypeMeta / metav1.ObjectMeta when the agent wires up
|
||||
// controller-runtime in M2.
|
||||
type TypeMeta struct {
|
||||
Kind string `json:"kind,omitempty"`
|
||||
APIVersion string `json:"apiVersion,omitempty"`
|
||||
}
|
||||
|
||||
type ObjectMeta struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Package cni hosts the CNI plugin entry-points. The plugin binary is short-
|
||||
// lived: it is invoked by kubelet, talks to flock-agent over a unix socket,
|
||||
// and exits. All real work happens in the agent.
|
||||
package cni
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/containernetworking/cni/pkg/skel"
|
||||
"github.com/containernetworking/cni/pkg/types"
|
||||
current "github.com/containernetworking/cni/pkg/types/100"
|
||||
)
|
||||
|
||||
// SocketPath is the unix socket exposed by flock-agent.
|
||||
const SocketPath = "/run/flock/flock.sock"
|
||||
|
||||
var errNotImplemented = errors.New("flock: ADD/DEL/CHECK not implemented in M1 scaffold")
|
||||
|
||||
// CmdAdd is invoked by kubelet when a pod sandbox is created.
|
||||
func CmdAdd(args *skel.CmdArgs) error {
|
||||
// M2: dial SocketPath, send ADD RPC, return CNI result.
|
||||
_ = args
|
||||
_ = current.ImplementedSpecVersion
|
||||
return types.NewError(types.ErrInternal, "flock-add", errNotImplemented.Error())
|
||||
}
|
||||
|
||||
// CmdDel is invoked by kubelet when a pod sandbox is torn down.
|
||||
func CmdDel(args *skel.CmdArgs) error {
|
||||
_ = args
|
||||
return types.NewError(types.ErrInternal, "flock-del", errNotImplemented.Error())
|
||||
}
|
||||
|
||||
// CmdCheck verifies that the live netns matches the persisted allocation.
|
||||
func CmdCheck(args *skel.CmdArgs) error {
|
||||
_ = args
|
||||
return types.NewError(types.ErrInternal, "flock-check", errNotImplemented.Error())
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
package cni
|
||||
|
||||
// rpc_client.go will hold the JSON-over-unix-socket client used by the CNI
|
||||
// plugin to call into flock-agent. Stub for M1; implementation lands in M2
|
||||
// alongside the agent's RPC server.
|
||||
@@ -0,0 +1,174 @@
|
||||
// Package embed implements ip-algo: deterministic embedding of pod identity
|
||||
// (namespace, pod name, image digest) into the host portion of an IPv6
|
||||
// address. The mapping is operator-friendly cosmetics — NOT a security
|
||||
// boundary. See dfritz-cni.md "IPv6 IID Embedding" for the full spec.
|
||||
package embed
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Field is one of the supported identity fields.
|
||||
type Field string
|
||||
|
||||
const (
|
||||
FieldNamespace Field = "namespace"
|
||||
FieldPod Field = "pod"
|
||||
FieldImage Field = "image"
|
||||
)
|
||||
|
||||
// Values carries the inputs for one embedding call. Image holds the SHA-256
|
||||
// manifest digest as 64 hex chars when known; otherwise pass the containerID
|
||||
// in ImageFallback and we'll FNV-1a-64 it.
|
||||
type Values struct {
|
||||
Namespace string
|
||||
Pod string
|
||||
Image string // 64-char hex sha256 manifest digest, or empty
|
||||
ImageFallback string // typically containerID, used when Image=="".
|
||||
}
|
||||
|
||||
// MaxFieldNibbles is the largest single-field width supported by this
|
||||
// implementation. 16 nibbles = 64 bits = the output width of FNV-1a-64.
|
||||
// Wider fields would require a wider hash; the design doc tolerates this
|
||||
// because real deployments use /64 nodes (15 field nibbles total).
|
||||
const MaxFieldNibbles = 16
|
||||
|
||||
// Embed returns the IPv6 address inside `network` whose host portion encodes
|
||||
// `fields` (in the given order) followed by the random nibble nNibble.
|
||||
//
|
||||
// `network` must be an IPv6 prefix whose length is a multiple of 4 (so the
|
||||
// host portion is a whole number of nibbles).
|
||||
//
|
||||
// `fields` must be non-empty. For a fully-random IID, the caller should pick
|
||||
// random bytes directly rather than calling Embed.
|
||||
//
|
||||
// nNibble is the random "instance" nibble; only the low 4 bits are used.
|
||||
// Callers regenerate it on collision (see allocations.json).
|
||||
func Embed(network *net.IPNet, fields []Field, vals Values, nNibble byte) (net.IP, error) {
|
||||
ones, bits := network.Mask.Size()
|
||||
if bits != 128 {
|
||||
return nil, fmt.Errorf("network is not IPv6: %s", network)
|
||||
}
|
||||
if ones%4 != 0 {
|
||||
return nil, fmt.Errorf("prefix length %d is not a multiple of 4", ones)
|
||||
}
|
||||
hostNibbles := (128 - ones) / 4
|
||||
if hostNibbles < 2 {
|
||||
return nil, fmt.Errorf("prefix /%d leaves %d host nibble(s); need at least 2 (one field + N)", ones, hostNibbles)
|
||||
}
|
||||
if len(fields) == 0 {
|
||||
return nil, fmt.Errorf("no fields specified; caller should generate random IID directly")
|
||||
}
|
||||
|
||||
fieldNibbles := hostNibbles - 1
|
||||
dist, err := distribute(fieldNibbles, len(fields))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
addr := make(net.IP, net.IPv6len)
|
||||
copy(addr, network.IP.To16())
|
||||
|
||||
// Stream nibbles left-to-right starting at the first host nibble.
|
||||
startNibble := ones / 4
|
||||
pos := 0
|
||||
for i, f := range fields {
|
||||
n := dist[i]
|
||||
v, err := fieldValue(f, vals, n*4)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Write `n` nibbles, most-significant first.
|
||||
for j := n - 1; j >= 0; j-- {
|
||||
nb := byte((v >> uint(j*4)) & 0xF)
|
||||
writeNibble(addr, startNibble+pos, nb)
|
||||
pos++
|
||||
}
|
||||
}
|
||||
writeNibble(addr, startNibble+pos, nNibble&0x0F)
|
||||
|
||||
return addr, nil
|
||||
}
|
||||
|
||||
// distribute splits `total` nibbles across `k` fields as evenly as possible,
|
||||
// giving any remainder to earlier fields one extra nibble at a time.
|
||||
func distribute(total, k int) ([]int, error) {
|
||||
if k <= 0 {
|
||||
return nil, fmt.Errorf("k must be > 0")
|
||||
}
|
||||
if total < k {
|
||||
return nil, fmt.Errorf("not enough host nibbles (%d) for %d fields", total, k)
|
||||
}
|
||||
out := make([]int, k)
|
||||
base := total / k
|
||||
rem := total % k
|
||||
for i := range out {
|
||||
out[i] = base
|
||||
if i < rem {
|
||||
out[i]++
|
||||
}
|
||||
if out[i] > MaxFieldNibbles {
|
||||
return nil, fmt.Errorf("field %d would need %d nibbles; max supported is %d", i, out[i], MaxFieldNibbles)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// fieldValue returns the top `bits` bits of the hash-or-digest for `f`,
|
||||
// right-aligned in the returned uint64.
|
||||
func fieldValue(f Field, v Values, bits int) (uint64, error) {
|
||||
if bits <= 0 || bits > 64 {
|
||||
return 0, fmt.Errorf("bad field bits %d (1..64)", bits)
|
||||
}
|
||||
switch f {
|
||||
case FieldNamespace:
|
||||
return topBitsFNV(v.Namespace, bits), nil
|
||||
case FieldPod:
|
||||
return topBitsFNV(v.Pod, bits), nil
|
||||
case FieldImage:
|
||||
if v.Image != "" {
|
||||
return topBitsHex(v.Image, bits)
|
||||
}
|
||||
return topBitsFNV(v.ImageFallback, bits), nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown field %q", f)
|
||||
}
|
||||
}
|
||||
|
||||
func topBitsFNV(s string, bits int) uint64 {
|
||||
h := fnv.New64a()
|
||||
_, _ = h.Write([]byte(s))
|
||||
return h.Sum64() >> uint(64-bits)
|
||||
}
|
||||
|
||||
// topBitsHex parses a leading sha256-digest-style hex string and returns
|
||||
// its top `bits` bits, right-aligned. Accepts an optional "sha256:" prefix.
|
||||
func topBitsHex(s string, bits int) (uint64, error) {
|
||||
s = strings.TrimPrefix(s, "sha256:")
|
||||
if len(s) < 16 { // need at least 8 bytes / 64 bits to right-shift
|
||||
return 0, fmt.Errorf("image digest too short: %d hex chars", len(s))
|
||||
}
|
||||
b, err := hex.DecodeString(s[:16])
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("image digest not hex: %w", err)
|
||||
}
|
||||
var v uint64
|
||||
for _, x := range b {
|
||||
v = (v << 8) | uint64(x)
|
||||
}
|
||||
return v >> uint(64-bits), nil
|
||||
}
|
||||
|
||||
// writeNibble sets the (nibIdx)-th nibble of addr (0 = highest nibble of byte 0).
|
||||
func writeNibble(addr net.IP, nibIdx int, nb byte) {
|
||||
bytePos := nibIdx / 2
|
||||
if nibIdx%2 == 0 {
|
||||
addr[bytePos] = (addr[bytePos] & 0x0F) | (nb << 4)
|
||||
} else {
|
||||
addr[bytePos] = (addr[bytePos] & 0xF0) | (nb & 0x0F)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
package embed
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func mustCIDR(t *testing.T, s string) *net.IPNet {
|
||||
t.Helper()
|
||||
_, n, err := net.ParseCIDR(s)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseCIDR(%q): %v", s, err)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func TestDistribute(t *testing.T) {
|
||||
cases := []struct {
|
||||
total, k int
|
||||
want []int
|
||||
}{
|
||||
// from the doc table
|
||||
{19, 1, []int{19}}, // /48 1 field — would exceed MaxFieldNibbles, see error test below
|
||||
{19, 2, []int{10, 9}},
|
||||
{19, 3, []int{7, 6, 6}},
|
||||
{17, 1, []int{17}},
|
||||
{17, 2, []int{9, 8}},
|
||||
{17, 3, []int{6, 6, 5}},
|
||||
{15, 1, []int{15}},
|
||||
{15, 2, []int{8, 7}},
|
||||
{15, 3, []int{5, 5, 5}},
|
||||
{11, 1, []int{11}},
|
||||
{11, 2, []int{6, 5}},
|
||||
{11, 3, []int{4, 4, 3}},
|
||||
{7, 1, []int{7}},
|
||||
{7, 2, []int{4, 3}},
|
||||
{7, 3, []int{3, 2, 2}},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got, err := distribute(c.total, c.k)
|
||||
if c.total > MaxFieldNibbles && c.k == 1 {
|
||||
if err == nil {
|
||||
t.Errorf("distribute(%d,%d): expected MaxFieldNibbles error", c.total, c.k)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("distribute(%d,%d): %v", c.total, c.k, err)
|
||||
continue
|
||||
}
|
||||
if !equal(got, c.want) {
|
||||
t.Errorf("distribute(%d,%d) = %v, want %v", c.total, c.k, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func equal(a, b []int) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestEmbed_Slash64Deterministic(t *testing.T) {
|
||||
// /64 with 3 fields: 5+5+5+1 nibbles = 64-bit IID.
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
addr, err := Embed(net64,
|
||||
[]Field{FieldNamespace, FieldPod, FieldImage},
|
||||
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
|
||||
0xe,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Embed: %v", err)
|
||||
}
|
||||
// Property: same inputs → same output (twice).
|
||||
addr2, err := Embed(net64,
|
||||
[]Field{FieldNamespace, FieldPod, FieldImage},
|
||||
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
|
||||
0xe,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !addr.Equal(addr2) {
|
||||
t.Fatalf("non-deterministic: %s vs %s", addr, addr2)
|
||||
}
|
||||
// Property: prefix preserved.
|
||||
if !net64.Contains(addr) {
|
||||
t.Fatalf("addr %s outside network %s", addr, net64)
|
||||
}
|
||||
// Property: last nibble is exactly N.
|
||||
if got := addr[len(addr)-1] & 0x0F; got != 0xe {
|
||||
t.Fatalf("last nibble = %x, want e", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_DifferentInputsDifferentOutputs(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
a, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns1", Pod: "p1"}, 0)
|
||||
b, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns2", Pod: "p1"}, 0)
|
||||
if a.Equal(b) {
|
||||
t.Fatalf("different namespace produced identical IID: %s", a)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_NRandomizesLowNibble(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
a, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x1)
|
||||
b, _ := Embed(net64, []Field{FieldNamespace}, Values{Namespace: "x"}, 0x2)
|
||||
if a.Equal(b) {
|
||||
t.Fatalf("changing N did not change address")
|
||||
}
|
||||
// And the only difference should be the last nibble.
|
||||
if a[15]>>4 != b[15]>>4 {
|
||||
t.Fatalf("upper nibble of last byte changed unexpectedly: %x vs %x", a[15], b[15])
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_RejectsBadInputs(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
if _, err := Embed(net64, nil, Values{}, 0); err == nil {
|
||||
t.Fatalf("expected error for empty fields")
|
||||
}
|
||||
odd := &net.IPNet{IP: net.ParseIP("2602:817:3000::"), Mask: net.CIDRMask(63, 128)}
|
||||
if _, err := Embed(odd, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil {
|
||||
t.Fatalf("expected error for /63 (not nibble-aligned)")
|
||||
}
|
||||
v4 := &net.IPNet{IP: net.ParseIP("10.0.0.0").To4(), Mask: net.CIDRMask(8, 32)}
|
||||
if _, err := Embed(v4, []Field{FieldNamespace}, Values{Namespace: "x"}, 0); err == nil {
|
||||
t.Fatalf("expected error for IPv4 network")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmbed_ImageDigestVsFallback(t *testing.T) {
|
||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||
digest := "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011"
|
||||
a, err := Embed(net64, []Field{FieldImage}, Values{Image: digest}, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Embed digest: %v", err)
|
||||
}
|
||||
b, err := Embed(net64, []Field{FieldImage}, Values{ImageFallback: "ctr-xyz"}, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Embed fallback: %v", err)
|
||||
}
|
||||
if a.Equal(b) {
|
||||
t.Fatalf("digest and fallback produced same IID")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user