Files
flock/pkg/agent/server.go
T
Donavan Fritz 31fcae2a97
Build flock Image / build (push) Has been cancelled
M2 plumbing: CNI ↔ agent JSON RPC over unix socket
Locks the wire format between /opt/cni/bin/flock and flock-agent. ADD
returns a CNI Result, DEL returns success/error, CHECK returns
success/error. Connection-per-RPC, newline-delimited JSON.

- pkg/cni/rpc.go: shared Op + Request + Response + framed encode/decode.
- pkg/cni/rpc_client.go: net.Dial + EncodeRequest + DecodeResponse;
  rpcSocket overridable for tests.
- pkg/cni/plugin.go: real implementations of CmdAdd/Del/Check that call
  through, mapping agent errors to types.Error.
- pkg/agent/rpc.go: rpcServer with swappable AddHandler/DelHandler/
  CheckHandler (defaults: not-implemented for ADD; idempotent-no-op for
  DEL/CHECK so kubelet teardown of a never-ADDed pod doesn't fail).
- pkg/agent/server.go: replaces the M1 accept-and-close placeholder
  with rpcServer.serve(ctx, listener); listener closes on ctx cancel.

Tests cover: Request/Response JSON roundtrip, end-to-end client →
unix-socket → fake server, agent error → CNI types.Error mapping.

ADD remains "not implemented" until netlink + IPAM wire-up — the agent
returns an error and kubelet will fail pod sandbox creation IF a node
were configured to use this CNI. host001's CNI plane is still 100%
Calico, so this changes nothing observable on the cluster.

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 22:21:33 -05:00

125 lines
3.3 KiB
Go

package agent
import (
"context"
"fmt"
"log/slog"
"net"
"os"
"path/filepath"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)
// SocketPath is the unix socket on which flock-agent serves RPCs from the
// CNI plugin. Mirrors pkg/cni.SocketPath; kept as a separate constant so the
// agent package has no import-cycle on the CNI package.
const SocketPath = "/run/flock/flock.sock"
// Server is the agent's runtime container: state store, kubernetes informers,
// netlink, BIRD, nftables. Current state: state store, NodeConfig informer,
// RPC dispatcher with stub ADD/DEL/CHECK handlers (will be replaced when
// netlink + IPAM wire-up lands).
type Server struct {
Node string
Store *Store
NodeConfig *NodeConfigCache
RPC *rpcServer
Logger *slog.Logger
socket string
restCfg *rest.Config
}
// Config configures NewServer.
type Config struct {
Node string
StatePath string // typically /var/lib/flock/allocations.json
Socket string // typically /run/flock/flock.sock
Logger *slog.Logger
Kubeconfig string // empty => in-cluster config
}
// NewServer constructs a Server. It does NOT start any goroutines; call Run.
func NewServer(cfg Config) (*Server, error) {
if cfg.Node == "" {
return nil, fmt.Errorf("Node must be set")
}
if cfg.StatePath == "" {
cfg.StatePath = "/var/lib/flock/allocations.json"
}
if cfg.Socket == "" {
cfg.Socket = SocketPath
}
if cfg.Logger == nil {
cfg.Logger = slog.Default()
}
if err := os.MkdirAll(filepath.Dir(cfg.StatePath), 0o750); err != nil {
return nil, fmt.Errorf("mkdir state dir: %w", err)
}
store, err := NewStore(cfg.StatePath, cfg.Node)
if err != nil {
return nil, fmt.Errorf("open store: %w", err)
}
restCfg, err := loadRestConfig(cfg.Kubeconfig)
if err != nil {
return nil, fmt.Errorf("load kube config: %w", err)
}
return &Server{
Node: cfg.Node,
Store: store,
NodeConfig: &NodeConfigCache{},
RPC: newRPCServer(cfg.Logger),
Logger: cfg.Logger,
socket: cfg.Socket,
restCfg: restCfg,
}, nil
}
func loadRestConfig(kubeconfig string) (*rest.Config, error) {
if kubeconfig != "" {
return clientcmd.BuildConfigFromFlags("", kubeconfig)
}
return rest.InClusterConfig()
}
// Run starts the agent and blocks until ctx is cancelled. M1.5 opens the
// unix listener, starts the NodeConfig informer, and waits. The RPC handler
// is still a no-op until M2.
func (s *Server) Run(ctx context.Context) error {
if err := os.MkdirAll(filepath.Dir(s.socket), 0o750); err != nil {
return fmt.Errorf("mkdir socket dir: %w", err)
}
_ = os.Remove(s.socket)
l, err := net.Listen("unix", s.socket)
if err != nil {
return fmt.Errorf("listen %s: %w", s.socket, err)
}
defer l.Close()
s.Logger.Info("flock-agent started",
"node", s.Node,
"socket", s.socket,
"allocations", len(s.Store.Snapshot()),
)
// RPC dispatcher takes ownership of the listener.
go s.RPC.serve(ctx, l)
// NodeConfig informer. Any error from the informer terminates Run.
errCh := make(chan error, 1)
go func() {
errCh <- StartNodeConfigInformer(ctx, s.restCfg, s.Node, s.NodeConfig, s.Logger)
}()
select {
case <-ctx.Done():
s.Logger.Info("flock-agent stopping")
return nil
case err := <-errCh:
return fmt.Errorf("informer: %w", err)
}
}