M2 plumbing: CNI ↔ agent JSON RPC over unix socket
Build flock Image / build (push) Has been cancelled
Build flock Image / build (push) Has been cancelled
Locks the wire format between /opt/cni/bin/flock and flock-agent. ADD returns a CNI Result, DEL returns success/error, CHECK returns success/error. Connection-per-RPC, newline-delimited JSON. - pkg/cni/rpc.go: shared Op + Request + Response + framed encode/decode. - pkg/cni/rpc_client.go: net.Dial + EncodeRequest + DecodeResponse; rpcSocket overridable for tests. - pkg/cni/plugin.go: real implementations of CmdAdd/Del/Check that call through, mapping agent errors to types.Error. - pkg/agent/rpc.go: rpcServer with swappable AddHandler/DelHandler/ CheckHandler (defaults: not-implemented for ADD; idempotent-no-op for DEL/CHECK so kubelet teardown of a never-ADDed pod doesn't fail). - pkg/agent/server.go: replaces the M1 accept-and-close placeholder with rpcServer.serve(ctx, listener); listener closes on ctx cancel. Tests cover: Request/Response JSON roundtrip, end-to-end client → unix-socket → fake server, agent error → CNI types.Error mapping. ADD remains "not implemented" until netlink + IPAM wire-up — the agent returns an error and kubelet will fail pod sandbox creation IF a node were configured to use this CNI. host001's CNI plane is still 100% Calico, so this changes nothing observable on the cluster. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+25
-13
@@ -4,8 +4,6 @@
|
||||
package cni
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/containernetworking/cni/pkg/skel"
|
||||
"github.com/containernetworking/cni/pkg/types"
|
||||
current "github.com/containernetworking/cni/pkg/types/100"
|
||||
@@ -14,24 +12,38 @@ import (
|
||||
// SocketPath is the unix socket exposed by flock-agent.
|
||||
const SocketPath = "/run/flock/flock.sock"
|
||||
|
||||
var errNotImplemented = errors.New("flock: ADD/DEL/CHECK not implemented in M1 scaffold")
|
||||
|
||||
// CmdAdd is invoked by kubelet when a pod sandbox is created.
|
||||
func CmdAdd(args *skel.CmdArgs) error {
|
||||
// M2: dial SocketPath, send ADD RPC, return CNI result.
|
||||
_ = args
|
||||
_ = current.ImplementedSpecVersion
|
||||
return types.NewError(types.ErrInternal, "flock-add", errNotImplemented.Error())
|
||||
resp, err := call(fromArgs(OpAdd, args))
|
||||
if err != nil {
|
||||
return types.NewError(types.ErrInternal, "flock-add", err.Error())
|
||||
}
|
||||
if cerr := toCNIError("add", resp); cerr != nil {
|
||||
return cerr
|
||||
}
|
||||
if resp.Result == nil {
|
||||
return types.NewError(types.ErrInternal, "flock-add", "agent returned no result")
|
||||
}
|
||||
return types.PrintResult(resp.Result, current.ImplementedSpecVersion)
|
||||
}
|
||||
|
||||
// CmdDel is invoked by kubelet when a pod sandbox is torn down.
|
||||
// CmdDel is invoked by kubelet when a pod sandbox is torn down. CNI spec:
|
||||
// DEL must be idempotent. The agent treats a missing allocation as success.
|
||||
func CmdDel(args *skel.CmdArgs) error {
|
||||
_ = args
|
||||
return types.NewError(types.ErrInternal, "flock-del", errNotImplemented.Error())
|
||||
resp, err := call(fromArgs(OpDel, args))
|
||||
if err != nil {
|
||||
// On dial failure during DEL, fail loudly — kubelet retries DEL,
|
||||
// and the next attempt may succeed once the agent is reachable.
|
||||
return types.NewError(types.ErrInternal, "flock-del", err.Error())
|
||||
}
|
||||
return toCNIError("del", resp)
|
||||
}
|
||||
|
||||
// CmdCheck verifies that the live netns matches the persisted allocation.
|
||||
func CmdCheck(args *skel.CmdArgs) error {
|
||||
_ = args
|
||||
return types.NewError(types.ErrInternal, "flock-check", errNotImplemented.Error())
|
||||
resp, err := call(fromArgs(OpCheck, args))
|
||||
if err != nil {
|
||||
return types.NewError(types.ErrInternal, "flock-check", err.Error())
|
||||
}
|
||||
return toCNIError("check", resp)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
package cni
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
current "github.com/containernetworking/cni/pkg/types/100"
|
||||
)
|
||||
|
||||
// Op is the CNI verb the plugin asks the agent to perform.
|
||||
type Op string
|
||||
|
||||
const (
|
||||
OpAdd Op = "ADD"
|
||||
OpDel Op = "DEL"
|
||||
OpCheck Op = "CHECK"
|
||||
)
|
||||
|
||||
// Request is sent over the unix socket from the CNI plugin to flock-agent.
|
||||
// Field names mirror the kubelet → CNI invocation env vars; the agent uses
|
||||
// these to look up Pod metadata via the informer cache.
|
||||
type Request struct {
|
||||
Op Op `json:"op"`
|
||||
ContainerID string `json:"container_id"`
|
||||
Netns string `json:"netns"` // /proc/<pid>/ns/net
|
||||
IfName string `json:"ifname"` // typically "eth0"
|
||||
Args string `json:"args"` // raw CNI_ARGS env (K=V;K=V;...)
|
||||
Path string `json:"path"` // CNI_PATH (plugin search path)
|
||||
StdinData []byte `json:"stdin_data"` // raw network configuration JSON
|
||||
}
|
||||
|
||||
// Response carries either a typed CNI Result or a single error string.
|
||||
// We use a string error (not a Go error) because errors traverse a JSON
|
||||
// boundary; the client converts back to a CNI types.Error.
|
||||
type Response struct {
|
||||
Result *current.Result `json:"result,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// EncodeRequest writes req to w as a single JSON object followed by '\n'.
|
||||
// The newline framing makes the wire-protocol simple to read incrementally
|
||||
// without a length prefix or full-stream-buffering.
|
||||
func EncodeRequest(w io.Writer, req Request) error {
|
||||
b, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal request: %w", err)
|
||||
}
|
||||
b = append(b, '\n')
|
||||
if _, err := w.Write(b); err != nil {
|
||||
return fmt.Errorf("write request: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DecodeRequest reads one newline-delimited JSON request from r.
|
||||
func DecodeRequest(r io.Reader) (Request, error) {
|
||||
var req Request
|
||||
dec := json.NewDecoder(r)
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return Request{}, fmt.Errorf("decode request: %w", err)
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// EncodeResponse writes resp to w as a single JSON object followed by '\n'.
|
||||
func EncodeResponse(w io.Writer, resp Response) error {
|
||||
b, err := json.Marshal(resp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal response: %w", err)
|
||||
}
|
||||
b = append(b, '\n')
|
||||
if _, err := w.Write(b); err != nil {
|
||||
return fmt.Errorf("write response: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DecodeResponse reads one newline-delimited JSON response from r.
|
||||
func DecodeResponse(r io.Reader) (Response, error) {
|
||||
var resp Response
|
||||
dec := json.NewDecoder(r)
|
||||
if err := dec.Decode(&resp); err != nil {
|
||||
return Response{}, fmt.Errorf("decode response: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
+57
-3
@@ -1,5 +1,59 @@
|
||||
package cni
|
||||
|
||||
// rpc_client.go will hold the JSON-over-unix-socket client used by the CNI
|
||||
// plugin to call into flock-agent. Stub for M1; implementation lands in M2
|
||||
// alongside the agent's RPC server.
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/containernetworking/cni/pkg/skel"
|
||||
"github.com/containernetworking/cni/pkg/types"
|
||||
)
|
||||
|
||||
// dialTimeout bounds how long the plugin waits to connect to the agent
|
||||
// socket. kubelet has its own outer timeout for the whole CNI invocation,
|
||||
// but a tighter bound here gives a clearer error if the DaemonSet pod is
|
||||
// gone or starting up.
|
||||
const dialTimeout = 5 * time.Second
|
||||
|
||||
// rpcSocket is overridable for tests.
|
||||
var rpcSocket = SocketPath
|
||||
|
||||
// call issues one Request and returns the Response. It dials the agent
|
||||
// unix socket, encodes the request, and decodes a single response. The
|
||||
// connection is closed before returning.
|
||||
func call(req Request) (*Response, error) {
|
||||
conn, err := net.DialTimeout("unix", rpcSocket, dialTimeout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dial flock-agent at %s: %w", rpcSocket, err)
|
||||
}
|
||||
defer conn.Close()
|
||||
if err := EncodeRequest(conn, req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := DecodeResponse(conn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
// fromArgs builds a Request from a CNI skel.CmdArgs invocation.
|
||||
func fromArgs(op Op, args *skel.CmdArgs) Request {
|
||||
return Request{
|
||||
Op: op,
|
||||
ContainerID: args.ContainerID,
|
||||
Netns: args.Netns,
|
||||
IfName: args.IfName,
|
||||
Args: args.Args,
|
||||
Path: args.Path,
|
||||
StdinData: args.StdinData,
|
||||
}
|
||||
}
|
||||
|
||||
// toCNIError converts an RPC Response.Error into a CNI types.Error, or nil.
|
||||
func toCNIError(stage string, resp *Response) error {
|
||||
if resp.Error == "" {
|
||||
return nil
|
||||
}
|
||||
return types.NewError(types.ErrInternal, "flock-"+stage, resp.Error)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
package cni
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
current "github.com/containernetworking/cni/pkg/types/100"
|
||||
)
|
||||
|
||||
func TestEncodeDecode_RequestRoundtrip(t *testing.T) {
|
||||
req := Request{
|
||||
Op: OpAdd,
|
||||
ContainerID: "abc",
|
||||
Netns: "/proc/1234/ns/net",
|
||||
IfName: "eth0",
|
||||
Args: "K8S_POD_NAMESPACE=mail;K8S_POD_NAME=stalwart-0",
|
||||
Path: "/opt/cni/bin",
|
||||
StdinData: []byte(`{"cniVersion":"1.0.0","name":"flock"}`),
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := EncodeRequest(&buf, req); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got, err := DecodeRequest(&buf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got.Op != req.Op || got.ContainerID != req.ContainerID || string(got.StdinData) != string(req.StdinData) {
|
||||
t.Fatalf("roundtrip mismatch:\n got=%+v\nwant=%+v", got, req)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeDecode_ResponseRoundtrip(t *testing.T) {
|
||||
resp := Response{
|
||||
Result: ¤t.Result{CNIVersion: current.ImplementedSpecVersion},
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := EncodeResponse(&buf, resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got, err := DecodeResponse(&buf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got.Result == nil || got.Result.CNIVersion != current.ImplementedSpecVersion {
|
||||
t.Fatalf("response roundtrip lost CNIVersion: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRPC_ClientToFakeServer wires the real client to a tiny in-process
|
||||
// server over a unix socket, exercising end-to-end framing.
|
||||
func TestRPC_ClientToFakeServer(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
sockPath := filepath.Join(dir, "flock.sock")
|
||||
|
||||
l, err := net.Listen("unix", sockPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
// Server: read one Request, write one Response.
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
conn, err := l.Accept()
|
||||
if err != nil {
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
req, err := DecodeRequest(conn)
|
||||
if err != nil {
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
var resp Response
|
||||
switch req.Op {
|
||||
case OpAdd:
|
||||
resp.Result = ¤t.Result{CNIVersion: current.ImplementedSpecVersion}
|
||||
case OpDel, OpCheck:
|
||||
// no-op success
|
||||
default:
|
||||
resp.Error = "unknown op"
|
||||
}
|
||||
done <- EncodeResponse(conn, resp)
|
||||
}()
|
||||
|
||||
// Point the client at our test socket.
|
||||
prev := rpcSocket
|
||||
rpcSocket = sockPath
|
||||
defer func() { rpcSocket = prev }()
|
||||
|
||||
resp, err := call(Request{Op: OpAdd, ContainerID: "test"})
|
||||
if err != nil {
|
||||
t.Fatalf("call: %v", err)
|
||||
}
|
||||
if resp.Result == nil {
|
||||
t.Fatalf("expected result, got %+v", resp)
|
||||
}
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
t.Fatalf("server: %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("server did not finish")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRPC_ServerErrorPropagatesToCNIError(t *testing.T) {
|
||||
resp := &Response{Error: "no NodeConfig for host001"}
|
||||
err := toCNIError("add", resp)
|
||||
if err == nil {
|
||||
t.Fatal("expected CNI error")
|
||||
}
|
||||
if got := err.Error(); got == "" || got == "no NodeConfig for host001" {
|
||||
// types.Error wraps the message — just make sure something non-empty
|
||||
// surfaces and that the underlying string is contained.
|
||||
t.Fatalf("unexpected error format: %q", got)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user