2026-04-24 21:17:42 -05:00
|
|
|
package agent
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"fmt"
|
|
|
|
|
"log/slog"
|
|
|
|
|
"net"
|
|
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
2026-04-24 22:33:48 -05:00
|
|
|
"time"
|
2026-04-24 22:00:48 -05:00
|
|
|
|
|
|
|
|
"k8s.io/client-go/rest"
|
|
|
|
|
"k8s.io/client-go/tools/clientcmd"
|
2026-04-24 21:17:42 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// SocketPath is the unix socket on which flock-agent serves RPCs from the
|
2026-04-24 22:33:48 -05:00
|
|
|
// CNI plugin.
|
2026-04-24 21:17:42 -05:00
|
|
|
const SocketPath = "/run/flock/flock.sock"
|
|
|
|
|
|
2026-04-24 22:33:48 -05:00
|
|
|
// Server orchestrates the agent runtime: store, informers, IPAM, netns,
|
|
|
|
|
// BIRD. Run() blocks until ctx is cancelled.
|
2026-04-24 21:17:42 -05:00
|
|
|
type Server struct {
|
2026-04-24 22:00:48 -05:00
|
|
|
Node string
|
|
|
|
|
Store *Store
|
|
|
|
|
NodeConfig *NodeConfigCache
|
2026-04-24 22:21:33 -05:00
|
|
|
RPC *rpcServer
|
2026-04-24 22:00:48 -05:00
|
|
|
Logger *slog.Logger
|
|
|
|
|
socket string
|
|
|
|
|
restCfg *rest.Config
|
2026-04-24 21:17:42 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type Config struct {
|
2026-04-24 22:00:48 -05:00
|
|
|
Node string
|
2026-04-24 22:33:48 -05:00
|
|
|
StatePath string
|
|
|
|
|
Socket string
|
2026-04-24 22:00:48 -05:00
|
|
|
Logger *slog.Logger
|
2026-04-24 22:33:48 -05:00
|
|
|
Kubeconfig string
|
2026-04-24 21:17:42 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func NewServer(cfg Config) (*Server, error) {
|
|
|
|
|
if cfg.Node == "" {
|
|
|
|
|
return nil, fmt.Errorf("Node must be set")
|
|
|
|
|
}
|
|
|
|
|
if cfg.StatePath == "" {
|
|
|
|
|
cfg.StatePath = "/var/lib/flock/allocations.json"
|
|
|
|
|
}
|
|
|
|
|
if cfg.Socket == "" {
|
|
|
|
|
cfg.Socket = SocketPath
|
|
|
|
|
}
|
|
|
|
|
if cfg.Logger == nil {
|
|
|
|
|
cfg.Logger = slog.Default()
|
|
|
|
|
}
|
|
|
|
|
if err := os.MkdirAll(filepath.Dir(cfg.StatePath), 0o750); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("mkdir state dir: %w", err)
|
|
|
|
|
}
|
|
|
|
|
store, err := NewStore(cfg.StatePath, cfg.Node)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("open store: %w", err)
|
|
|
|
|
}
|
2026-04-24 22:00:48 -05:00
|
|
|
|
|
|
|
|
restCfg, err := loadRestConfig(cfg.Kubeconfig)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("load kube config: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-24 21:17:42 -05:00
|
|
|
return &Server{
|
2026-04-24 22:00:48 -05:00
|
|
|
Node: cfg.Node,
|
|
|
|
|
Store: store,
|
|
|
|
|
NodeConfig: &NodeConfigCache{},
|
2026-04-24 22:21:33 -05:00
|
|
|
RPC: newRPCServer(cfg.Logger),
|
2026-04-24 22:00:48 -05:00
|
|
|
Logger: cfg.Logger,
|
|
|
|
|
socket: cfg.Socket,
|
|
|
|
|
restCfg: restCfg,
|
2026-04-24 21:17:42 -05:00
|
|
|
}, nil
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-24 22:00:48 -05:00
|
|
|
func loadRestConfig(kubeconfig string) (*rest.Config, error) {
|
|
|
|
|
if kubeconfig != "" {
|
|
|
|
|
return clientcmd.BuildConfigFromFlags("", kubeconfig)
|
|
|
|
|
}
|
|
|
|
|
return rest.InClusterConfig()
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-24 22:33:48 -05:00
|
|
|
// Run blocks until ctx is cancelled.
|
2026-04-24 21:17:42 -05:00
|
|
|
func (s *Server) Run(ctx context.Context) error {
|
|
|
|
|
if err := os.MkdirAll(filepath.Dir(s.socket), 0o750); err != nil {
|
|
|
|
|
return fmt.Errorf("mkdir socket dir: %w", err)
|
|
|
|
|
}
|
|
|
|
|
_ = os.Remove(s.socket)
|
|
|
|
|
l, err := net.Listen("unix", s.socket)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("listen %s: %w", s.socket, err)
|
|
|
|
|
}
|
|
|
|
|
defer l.Close()
|
|
|
|
|
|
|
|
|
|
s.Logger.Info("flock-agent started",
|
|
|
|
|
"node", s.Node,
|
|
|
|
|
"socket", s.socket,
|
|
|
|
|
"allocations", len(s.Store.Snapshot()),
|
|
|
|
|
)
|
|
|
|
|
|
2026-04-24 22:21:33 -05:00
|
|
|
// RPC dispatcher takes ownership of the listener.
|
|
|
|
|
go s.RPC.serve(ctx, l)
|
2026-04-24 21:17:42 -05:00
|
|
|
|
2026-04-24 22:33:48 -05:00
|
|
|
// NodeConfig informer.
|
2026-04-24 22:00:48 -05:00
|
|
|
errCh := make(chan error, 1)
|
|
|
|
|
go func() {
|
|
|
|
|
errCh <- StartNodeConfigInformer(ctx, s.restCfg, s.Node, s.NodeConfig, s.Logger)
|
|
|
|
|
}()
|
|
|
|
|
|
2026-04-24 22:33:48 -05:00
|
|
|
// Pod informer + Handlers + Bird are wired up by configureRuntime,
|
|
|
|
|
// which is platform-specific (real on Linux, no-op stub elsewhere).
|
|
|
|
|
go func() {
|
|
|
|
|
if err := s.configureRuntime(ctx); err != nil {
|
|
|
|
|
s.Logger.Error("runtime configure failed; ADD will return errors", "err", err)
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
|
2026-04-24 22:00:48 -05:00
|
|
|
select {
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
s.Logger.Info("flock-agent stopping")
|
|
|
|
|
return nil
|
|
|
|
|
case err := <-errCh:
|
|
|
|
|
return fmt.Errorf("informer: %w", err)
|
|
|
|
|
}
|
2026-04-24 21:17:42 -05:00
|
|
|
}
|
2026-04-24 22:33:48 -05:00
|
|
|
|
|
|
|
|
// firstAvailableNodeConfig polls the cache up to `timeout`. Used to wait
|
|
|
|
|
// for the operator-applied NodeConfig CR before booting the IPAM.
|
|
|
|
|
func (s *Server) firstAvailableNodeConfig(ctx context.Context, timeout time.Duration) error {
|
|
|
|
|
deadline := time.Now().Add(timeout)
|
|
|
|
|
for {
|
|
|
|
|
if s.NodeConfig.Load() != nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
if time.Now().After(deadline) {
|
|
|
|
|
return fmt.Errorf("NodeConfig %q not observed within %s", s.Node, timeout)
|
|
|
|
|
}
|
|
|
|
|
select {
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
return ctx.Err()
|
|
|
|
|
case <-time.After(200 * time.Millisecond):
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|