Compare commits
21 Commits
39ede9130b
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 580b9afa33 | |||
| 8d6e50c980 | |||
| 3d0081780c | |||
| 9b777ca7d1 | |||
| a17d33e182 | |||
| 40e13037b5 | |||
| 4a60c004c3 | |||
| 2daa2a21f3 | |||
| 362a1e01ce | |||
| 222006240c | |||
| e00579f7ca | |||
| a6a50fd73f | |||
| c61b12204c | |||
| e9d3eef2cc | |||
| 8dd109866e | |||
| d5161e09d3 | |||
| 65b2fb5b17 | |||
| c860e9351b | |||
| a6202a36bd | |||
| a7dc7bf1f4 | |||
| 5d9b6bfeec |
+14
-45
@@ -1,55 +1,24 @@
|
|||||||
name: Build flock Image
|
name: flock
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
release:
|
||||||
runs-on: fritzlab
|
runs-on: fritzlab
|
||||||
steps:
|
steps:
|
||||||
- name: Check out repo
|
- uses: actions/checkout@v4
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Log in to Gitea registry
|
- uses: https://code.fritzlab.net/action/image-build@v1
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
with:
|
||||||
registry: code.fritzlab.net
|
image: code.fritzlab.net/fritzlab-public/flock
|
||||||
username: ci-bot
|
build-args: GIT_SHA=${{ github.sha }}
|
||||||
password: ${{ secrets.REGISTRY_PASSWORD }}
|
smoke-test: |
|
||||||
|
docker run --rm $IMAGE --help || true
|
||||||
|
docker run --rm --entrypoint /usr/local/bin/flock $IMAGE || true
|
||||||
|
|
||||||
- name: Extract Docker metadata
|
- uses: https://code.fritzlab.net/action/image-push@v1
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
with:
|
||||||
images: code.fritzlab.net/fritzlab/flock
|
image: code.fritzlab.net/fritzlab-public/flock
|
||||||
tags: |
|
token: ${{ secrets.CI_BOT_TOKEN }}
|
||||||
type=raw,value=latest
|
org: fritzlab-public
|
||||||
type=raw,value=${{ github.run_number }}
|
name: flock
|
||||||
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
push: true
|
|
||||||
provenance: false
|
|
||||||
build-args: |
|
|
||||||
GIT_SHA=${{ github.sha }}
|
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
|
||||||
network: host
|
|
||||||
|
|
||||||
- name: Smoke-test image
|
|
||||||
run: |
|
|
||||||
docker run --rm code.fritzlab.net/fritzlab/flock:${{ github.run_number }} --help || true
|
|
||||||
docker run --rm --entrypoint /usr/local/bin/flock \
|
|
||||||
code.fritzlab.net/fritzlab/flock:${{ github.run_number }} || true
|
|
||||||
|
|
||||||
- name: Clean up old image tags
|
|
||||||
run: |
|
|
||||||
tea login add --name ci --url https://code.fritzlab.net --token '${{ secrets.CI_BOT_TOKEN }}' --no-version-check
|
|
||||||
tea api '/packages/fritzlab?type=container' \
|
|
||||||
| jq -r '.[] | select(.name=="flock") | select(.version | test("^[0-9]+$")) | .version' \
|
|
||||||
| sort -n | head -n -3 \
|
|
||||||
| while read tag; do
|
|
||||||
echo "deleting flock:$tag"
|
|
||||||
tea api -X DELETE "/packages/fritzlab/container/flock/$tag"
|
|
||||||
done
|
|
||||||
|
|||||||
@@ -0,0 +1,16 @@
|
|||||||
|
name: flock PR validation
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
jobs:
|
||||||
|
validate:
|
||||||
|
runs-on: fritzlab
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: https://code.fritzlab.net/action/image-build@v1
|
||||||
|
with:
|
||||||
|
image: code.fritzlab.net/fritzlab/flock
|
||||||
|
build-args: GIT_SHA=${{ github.sha }}
|
||||||
|
smoke-test: |
|
||||||
|
docker run --rm $IMAGE --help || true
|
||||||
|
docker run --rm --entrypoint /usr/local/bin/flock $IMAGE || true
|
||||||
@@ -2,8 +2,9 @@
|
|||||||
|
|
||||||
A small, opinionated Kubernetes CNI built around three ideas:
|
A small, opinionated Kubernetes CNI built around three ideas:
|
||||||
|
|
||||||
1. **IPv6-first.** Every pod gets a globally routable IPv6 address. IPv4 is
|
1. **Dual-stack, IPv6-friendly.** Every pod gets a globally routable IPv6
|
||||||
per-pod opt-in for legacy clients.
|
address by default. IPv4 is also enabled by default; either family can
|
||||||
|
be turned off per-node or per-pod when you really mean to.
|
||||||
2. **No tunnels, no NAT.** Pod addresses are the real packets on the wire.
|
2. **No tunnels, no NAT.** Pod addresses are the real packets on the wire.
|
||||||
Each node speaks BGP to its upstream router and advertises its own
|
Each node speaks BGP to its upstream router and advertises its own
|
||||||
per-node prefix. The pod network is just the LAN, plus host routes.
|
per-node prefix. The pod network is just the LAN, plus host routes.
|
||||||
@@ -127,7 +128,7 @@ spec:
|
|||||||
- 192.0.2.0/24 # IPv4 pool, used only when a pod opts in.
|
- 192.0.2.0/24 # IPv4 pool, used only when a pod opts in.
|
||||||
defaults:
|
defaults:
|
||||||
ipv6: true # Optional. Built-in baseline if omitted.
|
ipv6: true # Optional. Built-in baseline if omitted.
|
||||||
ipv4: false # Optional. Built-in baseline if omitted.
|
ipv4: true # Optional. Built-in baseline if omitted.
|
||||||
bgp:
|
bgp:
|
||||||
asn: 65101 # This node's local ASN.
|
asn: 65101 # This node's local ASN.
|
||||||
peers:
|
peers:
|
||||||
@@ -143,12 +144,12 @@ spec:
|
|||||||
on this node — i.e. when the pod has no explicit `flock.fritzlab.net/ipv6`
|
on this node — i.e. when the pod has no explicit `flock.fritzlab.net/ipv6`
|
||||||
or `flock.fritzlab.net/ipv4` annotation. Pod annotations always override.
|
or `flock.fritzlab.net/ipv4` annotation. Pod annotations always override.
|
||||||
If you omit `spec.defaults` (or any individual field inside it) flock
|
If you omit `spec.defaults` (or any individual field inside it) flock
|
||||||
falls back to its built-in baseline of **IPv6 on, IPv4 off**.
|
falls back to its built-in baseline of **dual-stack (IPv6 on, IPv4 on)**.
|
||||||
|
|
||||||
| Goal | `spec.defaults` |
|
| Goal | `spec.defaults` |
|
||||||
|---------------------------|----------------------------------------|
|
|-----------------------------------|----------------------------------------|
|
||||||
| IPv6-only (the default) | omit, or `{ ipv6: true, ipv4: false }`|
|
| Dual-stack (the default) | omit, or `{ ipv6: true, ipv4: true }` |
|
||||||
| Dual-stack by default | `{ ipv6: true, ipv4: true }` |
|
| IPv6-only node | `{ ipv6: true, ipv4: false }` |
|
||||||
| IPv4-only (legacy node) | `{ ipv6: false, ipv4: true }` |
|
| IPv4-only (legacy node) | `{ ipv6: false, ipv4: true }` |
|
||||||
|
|
||||||
A NodeConfig that resolves to "neither family" is rejected at allocation
|
A NodeConfig that resolves to "neither family" is rejected at allocation
|
||||||
@@ -175,15 +176,51 @@ optional; leave them off to inherit the per-node defaults.
|
|||||||
| `flock.fritzlab.net/cidr4` | CIDRs | Restrict IPv4 allocation to a sub-range of the node's `cidr4`. Comma-separated. |
|
| `flock.fritzlab.net/cidr4` | CIDRs | Restrict IPv4 allocation to a sub-range of the node's `cidr4`. Comma-separated. |
|
||||||
| `flock.fritzlab.net/ip-algo` | list | Embed identity into the IPv6 IID. Subset of `namespace,pod,image`, in order, comma-separated. |
|
| `flock.fritzlab.net/ip-algo` | list | Embed identity into the IPv6 IID. Subset of `namespace,pod,image`, in order, comma-separated. |
|
||||||
| `flock.fritzlab.net/anycast` | IPs | Bind these IPs on the pod's `lo`; advertise via BGP while pod is `Ready`. Mixed v6+v4 ok. |
|
| `flock.fritzlab.net/anycast` | IPs | Bind these IPs on the pod's `lo`; advertise via BGP while pod is `Ready`. Mixed v6+v4 ok. |
|
||||||
|
| `flock.fritzlab.net/addresses` | IPs | Bind these IPs on the pod's `eth0`. The first v6 and first v4 **replace** IPAM allocation for that family — the addresses IP becomes the pod's primary IP. Mixed v6+v4 ok. Single-replica only in practice. |
|
||||||
|
|
||||||
Bool values must be the literal strings `"true"` or `"false"`
|
Bool values must be the literal strings `"true"` or `"false"`
|
||||||
(case-insensitive, surrounding whitespace tolerated). Other values —
|
(case-insensitive, surrounding whitespace tolerated). Other values —
|
||||||
`1`, `0`, `yes`, `no` — are rejected so a typo can't silently flip
|
`1`, `0`, `yes`, `no` — are rejected so a typo can't silently flip
|
||||||
behaviour.
|
behaviour.
|
||||||
|
|
||||||
|
### `addresses` vs `anycast`
|
||||||
|
|
||||||
|
Both annotations bind operator-supplied IPs onto a pod and have flock
|
||||||
|
advertise `/128` (or `/32`) per-pod over BGP. The differences are
|
||||||
|
where the IP lands and what it's for:
|
||||||
|
|
||||||
|
| | `anycast` | `addresses` |
|
||||||
|
|----------------------------|----------------------------------------------------|-------------------------------------------------------------------|
|
||||||
|
| Bound on | pod `lo` | pod `eth0` |
|
||||||
|
| Multi-replica? | yes — every Ready replica advertises the same IP and the upstream router ECMPs across them | no — the same IP on multiple replicas is operator error |
|
||||||
|
| Replaces IPAM? | no — pod still has an IPAM-allocated unicast IP | **yes** — the first v6 + first v4 in the list become the pod's primary IPs in place of an IPAM allocation |
|
||||||
|
| Workload visibility | only the IPAM IP is on the primary interface | the public IP is `eth0`'s primary address — workloads that read their own NIC see it (e.g. Plex's remote-access detection) |
|
||||||
|
|
||||||
|
Use `anycast` for shared services with many replicas (DNS, ingress).
|
||||||
|
Use `addresses` when one specific pod needs a known public IP that the
|
||||||
|
workload itself must see on its primary interface.
|
||||||
|
|
||||||
|
### Conflict detection
|
||||||
|
|
||||||
|
`addresses` and `anycast` reject pods that supply an IP whose family is
|
||||||
|
disabled. If the resolved `WantV4` is false (via the pod's `ipv4`
|
||||||
|
annotation or the NodeConfig default) and any addresses- or
|
||||||
|
anycast-supplied IP is IPv4, the CNI ADD fails with an explicit error.
|
||||||
|
Same for v6. Both annotation types put IPs on a pod interface and rely
|
||||||
|
on the family being enabled for return-path routing — silently accepting
|
||||||
|
the IP would leave a non-functional pod.
|
||||||
|
|
||||||
|
### Outside-aggregate advertisement
|
||||||
|
|
||||||
|
When an `addresses` IP replaces IPAM (becomes the pod's primary IP) the
|
||||||
|
IP is typically **outside** the node's BGP aggregate (e.g. a public
|
||||||
|
`/32` on a node whose pod CIDR is private). flock notices this during
|
||||||
|
BGP rendering and advertises the IP individually as a per-pod `/32` or
|
||||||
|
`/128` so the upstream router has a route to it.
|
||||||
|
|
||||||
### Example pods
|
### Example pods
|
||||||
|
|
||||||
Default IPv6-only — no annotations needed:
|
Default dual-stack — no annotations needed:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
@@ -192,15 +229,15 @@ metadata:
|
|||||||
name: minimal
|
name: minimal
|
||||||
```
|
```
|
||||||
|
|
||||||
Dual-stack on a node whose default is IPv6-only:
|
IPv6 only — opt out of the default v4 allocation:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Pod
|
kind: Pod
|
||||||
metadata:
|
metadata:
|
||||||
name: legacy-client
|
name: v6-only
|
||||||
annotations:
|
annotations:
|
||||||
flock.fritzlab.net/ipv4: "true"
|
flock.fritzlab.net/ipv4: "false"
|
||||||
```
|
```
|
||||||
|
|
||||||
Operator-friendly addressing — `fnv(namespace) | fnv(pod) | random`
|
Operator-friendly addressing — `fnv(namespace) | fnv(pod) | random`
|
||||||
@@ -227,7 +264,6 @@ spec:
|
|||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
annotations:
|
annotations:
|
||||||
flock.fritzlab.net/ipv4: "true"
|
|
||||||
flock.fritzlab.net/anycast: "2001:db8:a::53, 192.0.2.53"
|
flock.fritzlab.net/anycast: "2001:db8:a::53, 192.0.2.53"
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
@@ -239,6 +275,29 @@ spec:
|
|||||||
failureThreshold: 1
|
failureThreshold: 1
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Workload with a known public IP — single-replica pod whose application
|
||||||
|
inspects its own primary interface (Plex's remote-access flow). The
|
||||||
|
addresses become the pod's primary IPs in place of any IPAM allocation;
|
||||||
|
the pod's `eth0` ends up with exactly the supplied addresses, and BGP
|
||||||
|
advertises them as a `/128` and `/32`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: plex
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
flock.fritzlab.net/addresses: "2001:db8:c606::166, 192.0.2.166"
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: plex
|
||||||
|
image: plexinc/pms-docker
|
||||||
|
```
|
||||||
|
|
||||||
## Use cases
|
## Use cases
|
||||||
|
|
||||||
**Highly-available DNS.** Run N CoreDNS replicas, each annotated with
|
**Highly-available DNS.** Run N CoreDNS replicas, each annotated with
|
||||||
@@ -274,28 +333,32 @@ annotation and the pod gets a normal allocation.
|
|||||||
|
|
||||||
| | flock | Calico | Cilium |
|
| | flock | Calico | Cilium |
|
||||||
|--------------------------|-----------------------------|------------------------------|------------------------------|
|
|--------------------------|-----------------------------|------------------------------|------------------------------|
|
||||||
| Default address family | IPv6 | IPv4 | dual |
|
| Default address family | dual (IPv6+IPv4) | IPv4 | dual |
|
||||||
| BGP | yes (BIRD) | yes | optional |
|
| BGP | yes (BIRD) | yes | optional |
|
||||||
| Overlay (VXLAN/IPIP) | never | optional | yes (geneve) or native |
|
| Overlay (VXLAN/IPIP) | never | optional | yes (geneve) or native |
|
||||||
| NAT in datapath | never | masquerade by default | masquerade by default |
|
| NAT in datapath | never | masquerade by default | masquerade by default |
|
||||||
| Anycast pod addressing | first-class | manual | optional, via service mesh |
|
| Anycast pod addressing | first-class | manual | optional, via service mesh |
|
||||||
| eBPF datapath | no | optional | yes |
|
| eBPF datapath | no | optional | yes |
|
||||||
| NetworkPolicy | not yet | yes (Felix) | yes (eBPF) |
|
| NetworkPolicy | yes (nftables) | yes (Felix) | yes (eBPF) |
|
||||||
| Cluster size target | small (< 100 nodes) | thousands | thousands |
|
| Cluster size target | small (< 100 nodes) | thousands | thousands |
|
||||||
| Operational surface area | low (1 DaemonSet, 1 CRD) | medium | high |
|
| Operational surface area | low (1 DaemonSet, 1 CRD) | medium | high |
|
||||||
| Production-ready | alpha | yes | yes |
|
| Production-ready | alpha | yes | yes |
|
||||||
|
|
||||||
flock is not trying to compete with Calico or Cilium. The right answer
|
flock is not trying to compete with Calico or Cilium. The right answer
|
||||||
for most clusters is one of those two — flock exists for clusters where
|
for most clusters is one of those two — flock exists for clusters where
|
||||||
every node already speaks BGP, the operator wants to think in IPv6-first
|
every node already speaks BGP, the operator wants real (no NAT) IPv6
|
||||||
terms, and per-pod anycast is something they actually want to use rather
|
addressing on every pod, and per-pod anycast is something they actually
|
||||||
than work around.
|
want to use rather than work around.
|
||||||
|
|
||||||
## Limitations and non-goals
|
## Limitations and non-goals
|
||||||
|
|
||||||
- No NetworkPolicy enforcement yet (planned).
|
- NetworkPolicy supports `networking.k8s.io/v1` (ingress + egress, all
|
||||||
- No NAT, no masquerade, no SNAT-egress. If your pods need to reach a
|
three peer types, numeric ports + port ranges). Named ports and
|
||||||
legacy IPv4-only destination, give them an IPv4 address explicitly.
|
AdminNetworkPolicy are not yet implemented.
|
||||||
|
- No NAT, no masquerade, no SNAT-egress. Pods reach the wider internet
|
||||||
|
using their real cluster-routable addresses; if your IPv4 pool isn't
|
||||||
|
routable beyond your network, those pods can't reach v4-only hosts on
|
||||||
|
the public internet without help from your border router.
|
||||||
- No multi-cluster, no peering across clusters.
|
- No multi-cluster, no peering across clusters.
|
||||||
- Linux-only datapath.
|
- Linux-only datapath.
|
||||||
- IPAM is per-node — there's no global allocator and no IP mobility.
|
- IPAM is per-node — there's no global allocator and no IP mobility.
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ spec:
|
|||||||
when its own annotations don't specify. Pod annotations
|
when its own annotations don't specify. Pod annotations
|
||||||
flock.fritzlab.net/ipv6 and flock.fritzlab.net/ipv4 always
|
flock.fritzlab.net/ipv6 and flock.fritzlab.net/ipv4 always
|
||||||
override these defaults. Built-in fallback (when this block
|
override these defaults. Built-in fallback (when this block
|
||||||
or any field is omitted) is IPv6=true, IPv4=false.
|
or any field is omitted) is IPv6=true, IPv4=true (dual-stack).
|
||||||
properties:
|
properties:
|
||||||
ipv6:
|
ipv6:
|
||||||
type: boolean
|
type: boolean
|
||||||
@@ -56,7 +56,7 @@ spec:
|
|||||||
type: boolean
|
type: boolean
|
||||||
description: |
|
description: |
|
||||||
Default IPv4 inclusion for pods on this node. Omit to
|
Default IPv4 inclusion for pods on this node. Omit to
|
||||||
inherit the built-in baseline (false).
|
inherit the built-in baseline (true).
|
||||||
bgp:
|
bgp:
|
||||||
type: object
|
type: object
|
||||||
required: [asn, peers]
|
required: [asn, peers]
|
||||||
|
|||||||
+4
-13
@@ -41,19 +41,10 @@ spec:
|
|||||||
nodeSelector:
|
nodeSelector:
|
||||||
flock.fritzlab.net/agent: ""
|
flock.fritzlab.net/agent: ""
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: fritzlab.net/cni-test
|
# CNI must schedule on a fresh node before it becomes Ready —
|
||||||
operator: Equal
|
# the node has not-ready:NoSchedule until flock installs the CNI conflist.
|
||||||
value: "true"
|
# Catch-all tolerates all taints so the agent always runs.
|
||||||
effect: NoSchedule
|
- operator: Exists
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
- key: node.kubernetes.io/not-ready
|
|
||||||
operator: Exists
|
|
||||||
effect: NoExecute
|
|
||||||
- key: node.kubernetes.io/unreachable
|
|
||||||
operator: Exists
|
|
||||||
effect: NoExecute
|
|
||||||
initContainers:
|
initContainers:
|
||||||
- name: install-cni
|
- name: install-cni
|
||||||
image: code.fritzlab.net/fritzlab/flock:latest
|
image: code.fritzlab.net/fritzlab/flock:latest
|
||||||
|
|||||||
+9
-15
@@ -45,7 +45,7 @@ spec:
|
|||||||
when its own annotations don't specify. Pod annotations
|
when its own annotations don't specify. Pod annotations
|
||||||
flock.fritzlab.net/ipv6 and flock.fritzlab.net/ipv4 always
|
flock.fritzlab.net/ipv6 and flock.fritzlab.net/ipv4 always
|
||||||
override these defaults. Built-in fallback (when this block
|
override these defaults. Built-in fallback (when this block
|
||||||
or any field is omitted) is IPv6=true, IPv4=false.
|
or any field is omitted) is IPv6=true, IPv4=true (dual-stack).
|
||||||
properties:
|
properties:
|
||||||
ipv6:
|
ipv6:
|
||||||
type: boolean
|
type: boolean
|
||||||
@@ -56,7 +56,7 @@ spec:
|
|||||||
type: boolean
|
type: boolean
|
||||||
description: |
|
description: |
|
||||||
Default IPv4 inclusion for pods on this node. Omit to
|
Default IPv4 inclusion for pods on this node. Omit to
|
||||||
inherit the built-in baseline (false).
|
inherit the built-in baseline (true).
|
||||||
bgp:
|
bgp:
|
||||||
type: object
|
type: object
|
||||||
required: [asn, peers]
|
required: [asn, peers]
|
||||||
@@ -119,6 +119,9 @@ rules:
|
|||||||
- apiGroups: ["networking.k8s.io"]
|
- apiGroups: ["networking.k8s.io"]
|
||||||
resources: ["networkpolicies"]
|
resources: ["networkpolicies"]
|
||||||
verbs: ["get", "list", "watch"]
|
verbs: ["get", "list", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["namespaces"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
- apiGroups: [""]
|
- apiGroups: [""]
|
||||||
resources: ["nodes/status"]
|
resources: ["nodes/status"]
|
||||||
verbs: ["patch"]
|
verbs: ["patch"]
|
||||||
@@ -179,19 +182,10 @@ spec:
|
|||||||
nodeSelector:
|
nodeSelector:
|
||||||
flock.fritzlab.net/agent: ""
|
flock.fritzlab.net/agent: ""
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: fritzlab.net/cni-test
|
# CNI must schedule on a fresh node before it becomes Ready —
|
||||||
operator: Equal
|
# the node has not-ready:NoSchedule until flock installs the CNI conflist.
|
||||||
value: "true"
|
# Catch-all tolerates all taints so the agent always runs.
|
||||||
effect: NoSchedule
|
- operator: Exists
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
- key: node.kubernetes.io/not-ready
|
|
||||||
operator: Exists
|
|
||||||
effect: NoExecute
|
|
||||||
- key: node.kubernetes.io/unreachable
|
|
||||||
operator: Exists
|
|
||||||
effect: NoExecute
|
|
||||||
initContainers:
|
initContainers:
|
||||||
- name: install-cni
|
- name: install-cni
|
||||||
image: code.fritzlab.net/fritzlab/flock:latest
|
image: code.fritzlab.net/fritzlab/flock:latest
|
||||||
|
|||||||
+131
-31
@@ -2,6 +2,7 @@ package agent
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"net"
|
"net"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -21,12 +22,13 @@ const (
|
|||||||
annCIDR4 = "cidr4"
|
annCIDR4 = "cidr4"
|
||||||
annIPAlgo = "ip-algo"
|
annIPAlgo = "ip-algo"
|
||||||
annAnycast = "anycast"
|
annAnycast = "anycast"
|
||||||
|
annAddresses = "addresses"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FamilyDefaults is the per-call baseline for whether a pod receives an IPv6
|
// FamilyDefaults is the per-call baseline for whether a pod receives an IPv6
|
||||||
// and/or IPv4 address. It is the merge of:
|
// and/or IPv4 address. It is the merge of:
|
||||||
//
|
//
|
||||||
// 1. flock's built-in baseline (IPv6=true, IPv4=false), then
|
// 1. flock's built-in baseline (IPv6=true, IPv4=true — dual-stack), then
|
||||||
// 2. any NodeConfig.Spec.Defaults override the operator has applied to
|
// 2. any NodeConfig.Spec.Defaults override the operator has applied to
|
||||||
// the local node.
|
// the local node.
|
||||||
//
|
//
|
||||||
@@ -43,13 +45,17 @@ type FamilyDefaults struct {
|
|||||||
WantV4 bool
|
WantV4 bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// BuiltinFamilyDefaults returns flock's hard-coded fallback: IPv6 only.
|
// BuiltinFamilyDefaults returns flock's hard-coded fallback: dual-stack
|
||||||
// This is the policy applied when no NodeConfig override is in effect.
|
// (IPv6 + IPv4). This is the policy applied when no NodeConfig override is
|
||||||
|
// in effect. Pods that want a single family explicitly opt out via the
|
||||||
|
// `flock.fritzlab.net/ipv6` or `flock.fritzlab.net/ipv4` annotation, or
|
||||||
|
// the operator narrows the fallback at the node level via
|
||||||
|
// NodeConfig.Spec.Defaults.
|
||||||
//
|
//
|
||||||
// We define it as a function rather than a var so callers can't mutate the
|
// We define it as a function rather than a var so callers can't mutate the
|
||||||
// shared baseline at runtime.
|
// shared baseline at runtime.
|
||||||
func BuiltinFamilyDefaults() FamilyDefaults {
|
func BuiltinFamilyDefaults() FamilyDefaults {
|
||||||
return FamilyDefaults{WantV6: true, WantV4: false}
|
return FamilyDefaults{WantV6: true, WantV4: true}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FamilyDefaultsFromNodeConfig resolves the effective per-node defaults,
|
// FamilyDefaultsFromNodeConfig resolves the effective per-node defaults,
|
||||||
@@ -83,12 +89,16 @@ type ParsedAnnotations struct {
|
|||||||
CIDR6 []*net.IPNet
|
CIDR6 []*net.IPNet
|
||||||
// CIDR4 narrows IPv4 allocation. nil/empty means "use any node CIDR4".
|
// CIDR4 narrows IPv4 allocation. nil/empty means "use any node CIDR4".
|
||||||
CIDR4 []*net.IPNet
|
CIDR4 []*net.IPNet
|
||||||
// IPAlgo is the ordered list of identity fields used to build the IID.
|
|
||||||
// nil/empty means "random IID".
|
|
||||||
IPAlgo []embed.Field
|
|
||||||
// Anycast is the set of anycast IPs to bind on the pod's loopback.
|
// Anycast is the set of anycast IPs to bind on the pod's loopback.
|
||||||
// nil/empty means "no anycast".
|
// nil/empty means "no anycast".
|
||||||
Anycast []net.IP
|
Anycast []net.IP
|
||||||
|
// Addresses is the set of additional IPs to bind directly on the pod's
|
||||||
|
// eth0. BGP advertisement (/128+/32) is identical to Anycast; the only
|
||||||
|
// difference is that these IPs land on the primary interface instead of
|
||||||
|
// lo. Use this when the workload needs the IP directly visible on eth0
|
||||||
|
// (e.g. Plex, which inspects its own interfaces for remote-access setup).
|
||||||
|
// nil/empty means "no extra addresses".
|
||||||
|
Addresses []net.IP
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseAnnotations applies the supplied per-node defaults and validates the
|
// ParseAnnotations applies the supplied per-node defaults and validates the
|
||||||
@@ -140,14 +150,6 @@ func ParseAnnotations(in map[string]string, defaults FamilyDefaults) (*ParsedAnn
|
|||||||
out.CIDR4 = nets
|
out.CIDR4 = nets
|
||||||
}
|
}
|
||||||
|
|
||||||
if v, ok := in[annotationPrefix+annIPAlgo]; ok {
|
|
||||||
fields, err := parseIPAlgo(v)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("annotation %s: %w", annIPAlgo, err)
|
|
||||||
}
|
|
||||||
out.IPAlgo = fields
|
|
||||||
}
|
|
||||||
|
|
||||||
if v, ok := in[annotationPrefix+annAnycast]; ok {
|
if v, ok := in[annotationPrefix+annAnycast]; ok {
|
||||||
ips, err := parseIPList(v)
|
ips, err := parseIPList(v)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -156,9 +158,52 @@ func ParseAnnotations(in map[string]string, defaults FamilyDefaults) (*ParsedAnn
|
|||||||
out.Anycast = ips
|
out.Anycast = ips
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if v, ok := in[annotationPrefix+annAddresses]; ok {
|
||||||
|
ips, err := parseIPList(v)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("annotation %s: %w", annAddresses, err)
|
||||||
|
}
|
||||||
|
out.Addresses = ips
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reject pods that ask for an addresses- or anycast-supplied IP whose
|
||||||
|
// family was disabled (via the pod's ipv6/ipv4 annotation or NodeConfig
|
||||||
|
// default). Both annotation types put the IP on a pod interface and rely
|
||||||
|
// on the family being enabled for return-path routing — addresses needs
|
||||||
|
// the in-pod default v6/v4 route to send replies; anycast on lo needs
|
||||||
|
// the same default route on eth0 for the same reason. Silently accepting
|
||||||
|
// the IP would leave a non-functional pod, so we fail closed at ADD.
|
||||||
|
for _, ip := range out.Addresses {
|
||||||
|
if err := requireFamilyEnabled(ip, out.WantV6, out.WantV4, annAddresses); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, ip := range out.Anycast {
|
||||||
|
if err := requireFamilyEnabled(ip, out.WantV6, out.WantV4, annAnycast); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// requireFamilyEnabled returns an error when ip's family was opted out via
|
||||||
|
// the resolved WantV6/WantV4 booleans (pod annotation > NodeConfig default >
|
||||||
|
// built-in dual-stack). The source string identifies which annotation
|
||||||
|
// supplied the conflicting IP so the operator's error message is specific.
|
||||||
|
func requireFamilyEnabled(ip net.IP, wantV6, wantV4 bool, source string) error {
|
||||||
|
if ip.To4() != nil {
|
||||||
|
if !wantV4 {
|
||||||
|
return fmt.Errorf("annotation %s: contains IPv4 %s but ipv4 is disabled (annotation or NodeConfig default)", source, ip)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !wantV6 {
|
||||||
|
return fmt.Errorf("annotation %s: contains IPv6 %s but ipv6 is disabled (annotation or NodeConfig default)", source, ip)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// parseBoolAnnotation accepts only "true" or "false" (case-insensitive,
|
// parseBoolAnnotation accepts only "true" or "false" (case-insensitive,
|
||||||
// surrounding whitespace tolerated). All other values — including "1", "0",
|
// surrounding whitespace tolerated). All other values — including "1", "0",
|
||||||
// "yes", "no" — are rejected so operator typos are caught loudly rather
|
// "yes", "no" — are rejected so operator typos are caught loudly rather
|
||||||
@@ -243,30 +288,85 @@ func parseIPList(s string) ([]net.IP, error) {
|
|||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseIPAlgo parses the ip-algo annotation. Each comma-separated token must
|
// ResolveIPAlgo resolves the effective ip-algo for a pod. Precedence:
|
||||||
// match one of: namespace, pod, image. Empty tokens are dropped; unknown
|
//
|
||||||
// tokens are reported.
|
// pod annotation → NodeConfig annotation → nil (random IID).
|
||||||
func parseIPAlgo(s string) ([]embed.Field, error) {
|
//
|
||||||
|
// Empty, missing, or invalid annotations at any level fall through to the
|
||||||
|
// next. Invalid input emits a warning via log; a nil log is silent. A nil
|
||||||
|
// return value means "no algo, generate a fully random IID".
|
||||||
|
//
|
||||||
|
// "Invalid" is everything tryParseIPAlgo cannot turn into a non-empty,
|
||||||
|
// duplicate-free subset of {namespace, pod, image} — unrecognised tokens,
|
||||||
|
// duplicates, lists that resolve to zero fields after trimming.
|
||||||
|
func ResolveIPAlgo(podAnn, nodeAnn map[string]string, log *slog.Logger) []embed.Field {
|
||||||
|
if v, ok := podAnn[annotationPrefix+annIPAlgo]; ok {
|
||||||
|
if fields := tryParseIPAlgo(v); fields != nil {
|
||||||
|
return fields
|
||||||
|
}
|
||||||
|
warnIPAlgo(log, "pod", v)
|
||||||
|
}
|
||||||
|
if v, ok := nodeAnn[annotationPrefix+annIPAlgo]; ok {
|
||||||
|
if fields := tryParseIPAlgo(v); fields != nil {
|
||||||
|
return fields
|
||||||
|
}
|
||||||
|
warnIPAlgo(log, "NodeConfig", v)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// warnIPAlgo logs a single warning when an ip-algo annotation is present
|
||||||
|
// but cannot be parsed. Empty values are not worth a warn — they are
|
||||||
|
// indistinguishable from "key absent" by the user's design rule, so we
|
||||||
|
// only warn when a non-empty value failed parsing.
|
||||||
|
func warnIPAlgo(log *slog.Logger, source, value string) {
|
||||||
|
if log == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(value) == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Warn("ignoring invalid ip-algo annotation; falling through",
|
||||||
|
"source", source, "value", value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryParseIPAlgo parses an ip-algo annotation value under the relaxed
|
||||||
|
// "invalid → unset" rules. Returns nil for: empty input, unrecognised
|
||||||
|
// tokens, duplicate fields, or anything that resolves to zero fields after
|
||||||
|
// trimming. Returns the ordered field list otherwise.
|
||||||
|
//
|
||||||
|
// Duplicates collapse to nil rather than dedup-and-keep so the operator
|
||||||
|
// notices their malformed annotation via the warn log instead of silently
|
||||||
|
// losing a field they thought they had specified.
|
||||||
|
func tryParseIPAlgo(s string) []embed.Field {
|
||||||
var out []embed.Field
|
var out []embed.Field
|
||||||
|
seen := map[embed.Field]struct{}{}
|
||||||
for _, part := range strings.Split(s, ",") {
|
for _, part := range strings.Split(s, ",") {
|
||||||
part = strings.TrimSpace(part)
|
part = strings.TrimSpace(part)
|
||||||
switch part {
|
if part == "" {
|
||||||
case "":
|
|
||||||
continue
|
continue
|
||||||
case string(embed.FieldNamespace):
|
|
||||||
out = append(out, embed.FieldNamespace)
|
|
||||||
case string(embed.FieldPod):
|
|
||||||
out = append(out, embed.FieldPod)
|
|
||||||
case string(embed.FieldImage):
|
|
||||||
out = append(out, embed.FieldImage)
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unknown ip-algo field %q (allowed: namespace, pod, image)", part)
|
|
||||||
}
|
}
|
||||||
|
var f embed.Field
|
||||||
|
switch part {
|
||||||
|
case string(embed.FieldNamespace):
|
||||||
|
f = embed.FieldNamespace
|
||||||
|
case string(embed.FieldApp):
|
||||||
|
f = embed.FieldApp
|
||||||
|
case string(embed.FieldImage):
|
||||||
|
f = embed.FieldImage
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if _, dup := seen[f]; dup {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
seen[f] = struct{}{}
|
||||||
|
out = append(out, f)
|
||||||
}
|
}
|
||||||
if len(out) == 0 {
|
if len(out) == 0 {
|
||||||
return nil, fmt.Errorf("empty ip-algo")
|
return nil
|
||||||
}
|
}
|
||||||
return out, nil
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
// CNIArgs is the typed view of the K=V;K=V CNI_ARGS string passed by kubelet.
|
// CNIArgs is the typed view of the K=V;K=V CNI_ARGS string passed by kubelet.
|
||||||
|
|||||||
@@ -5,9 +5,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// FuzzParseAnnotations explores the joint space of {ipv6, ipv4, cidr6, cidr4,
|
// FuzzParseAnnotations explores the joint space of {ipv6, ipv4, cidr6, cidr4,
|
||||||
// ip-algo, anycast} annotations with random byte strings. Every recognised
|
// anycast} annotations with random byte strings. ip-algo is handled by
|
||||||
// key is exercised by deriving a deterministic input map from the fuzzed
|
// ResolveIPAlgo (separate fuzz target below) and is no longer touched by
|
||||||
// bytes; this gives the fuzzer reach into all parser branches at once.
|
// ParseAnnotations. Every recognised key is exercised by deriving a
|
||||||
|
// deterministic input map from the fuzzed bytes.
|
||||||
//
|
//
|
||||||
// Properties checked:
|
// Properties checked:
|
||||||
//
|
//
|
||||||
@@ -15,15 +16,15 @@ import (
|
|||||||
// 2. On nil-error return, the result satisfies the design-doc invariant
|
// 2. On nil-error return, the result satisfies the design-doc invariant
|
||||||
// that at least one of WantV6 / WantV4 is true (a pod always has at
|
// that at least one of WantV6 / WantV4 is true (a pod always has at
|
||||||
// least one address).
|
// least one address).
|
||||||
// 3. Anycast IPs and IPAlgo fields are non-nil/empty only when the
|
// 3. Anycast IPs and CIDR slices are non-nil/empty only when the
|
||||||
// annotation was supplied; never spontaneously populated.
|
// annotation was supplied; never spontaneously populated.
|
||||||
//
|
//
|
||||||
// Seed corpus covers known edge cases the spec must handle.
|
// Seed corpus covers known edge cases the spec must handle.
|
||||||
func FuzzParseAnnotations(f *testing.F) {
|
func FuzzParseAnnotations(f *testing.F) {
|
||||||
// Seeds: each entry is six strings — the literal raw values for the
|
// Seeds: each entry is five strings — the literal raw values for the
|
||||||
// six parsed keys. Empty string for "key absent".
|
// five parsed keys. Empty string for "key absent".
|
||||||
type seed struct {
|
type seed struct {
|
||||||
ipv6, ipv4, cidr6, cidr4, ipAlgo, anycast string
|
ipv6, ipv4, cidr6, cidr4, anycast string
|
||||||
}
|
}
|
||||||
seeds := []seed{
|
seeds := []seed{
|
||||||
{},
|
{},
|
||||||
@@ -43,11 +44,6 @@ func FuzzParseAnnotations(f *testing.F) {
|
|||||||
{cidr4: "172.25.210.0/24"}, // valid
|
{cidr4: "172.25.210.0/24"}, // valid
|
||||||
{cidr4: "172.25.210.0/24,172.25.211.0/24"}, // multiple
|
{cidr4: "172.25.210.0/24,172.25.211.0/24"}, // multiple
|
||||||
{cidr4: "2602:817::/32"}, // family mismatch
|
{cidr4: "2602:817::/32"}, // family mismatch
|
||||||
{ipAlgo: "namespace,pod,image"},
|
|
||||||
{ipAlgo: "namespace, pod , image"}, // whitespace
|
|
||||||
{ipAlgo: "namespace,unknown"}, // invalid
|
|
||||||
{ipAlgo: ""}, // invalid (empty)
|
|
||||||
{ipAlgo: ","}, // invalid
|
|
||||||
{anycast: "2602:817:3000:ac::1"},
|
{anycast: "2602:817:3000:ac::1"},
|
||||||
{anycast: "2602:817:3000:ac::1, 172.25.255.1"},
|
{anycast: "2602:817:3000:ac::1, 172.25.255.1"},
|
||||||
{anycast: "::1"}, // loopback (allowed at parse time)
|
{anycast: "::1"}, // loopback (allowed at parse time)
|
||||||
@@ -62,15 +58,14 @@ func FuzzParseAnnotations(f *testing.F) {
|
|||||||
{anycast: "\x00\x00"},
|
{anycast: "\x00\x00"},
|
||||||
// Unicode
|
// Unicode
|
||||||
{ipv4: "trüe"},
|
{ipv4: "trüe"},
|
||||||
{ipAlgo: "námespace"},
|
|
||||||
// Very long
|
// Very long
|
||||||
{cidr6: longString("2602:817:3000:f001::/64,", 4096)},
|
{cidr6: longString("2602:817:3000:f001::/64,", 4096)},
|
||||||
}
|
}
|
||||||
for _, s := range seeds {
|
for _, s := range seeds {
|
||||||
f.Add(s.ipv6, s.ipv4, s.cidr6, s.cidr4, s.ipAlgo, s.anycast)
|
f.Add(s.ipv6, s.ipv4, s.cidr6, s.cidr4, s.anycast)
|
||||||
}
|
}
|
||||||
|
|
||||||
f.Fuzz(func(t *testing.T, ipv6, ipv4, cidr6, cidr4, ipAlgo, anycast string) {
|
f.Fuzz(func(t *testing.T, ipv6, ipv4, cidr6, cidr4, anycast string) {
|
||||||
in := map[string]string{}
|
in := map[string]string{}
|
||||||
// Treat empty as "key absent" so the seed table matches the run-time
|
// Treat empty as "key absent" so the seed table matches the run-time
|
||||||
// shape; Kubernetes annotations cannot have a nil value but they CAN
|
// shape; Kubernetes annotations cannot have a nil value but they CAN
|
||||||
@@ -88,9 +83,6 @@ func FuzzParseAnnotations(f *testing.F) {
|
|||||||
if cidr4 != "" {
|
if cidr4 != "" {
|
||||||
in[annotationPrefix+annCIDR4] = cidr4
|
in[annotationPrefix+annCIDR4] = cidr4
|
||||||
}
|
}
|
||||||
if ipAlgo != "" {
|
|
||||||
in[annotationPrefix+annIPAlgo] = ipAlgo
|
|
||||||
}
|
|
||||||
if anycast != "" {
|
if anycast != "" {
|
||||||
in[annotationPrefix+annAnycast] = anycast
|
in[annotationPrefix+annAnycast] = anycast
|
||||||
}
|
}
|
||||||
@@ -104,9 +96,6 @@ func FuzzParseAnnotations(f *testing.F) {
|
|||||||
t.Fatalf("parser accepted but produced no family: in=%#v", in)
|
t.Fatalf("parser accepted but produced no family: in=%#v", in)
|
||||||
}
|
}
|
||||||
// Property: optional fields populated only when their key was set.
|
// Property: optional fields populated only when their key was set.
|
||||||
if _, hasAlgo := in[annotationPrefix+annIPAlgo]; !hasAlgo && len(got.IPAlgo) != 0 {
|
|
||||||
t.Fatalf("IPAlgo populated without annotation")
|
|
||||||
}
|
|
||||||
if _, hasAny := in[annotationPrefix+annAnycast]; !hasAny && len(got.Anycast) != 0 {
|
if _, hasAny := in[annotationPrefix+annAnycast]; !hasAny && len(got.Anycast) != 0 {
|
||||||
t.Fatalf("Anycast populated without annotation")
|
t.Fatalf("Anycast populated without annotation")
|
||||||
}
|
}
|
||||||
|
|||||||
+210
-25
@@ -13,8 +13,8 @@ func boolPtr(b bool) *bool { return &b }
|
|||||||
|
|
||||||
func TestBuiltinFamilyDefaults(t *testing.T) {
|
func TestBuiltinFamilyDefaults(t *testing.T) {
|
||||||
d := BuiltinFamilyDefaults()
|
d := BuiltinFamilyDefaults()
|
||||||
if !d.WantV6 || d.WantV4 {
|
if !d.WantV6 || !d.WantV4 {
|
||||||
t.Fatalf("built-in defaults wrong: v6=%v v4=%v (want true/false)", d.WantV6, d.WantV4)
|
t.Fatalf("built-in defaults wrong: v6=%v v4=%v (want dual-stack true/true)", d.WantV6, d.WantV4)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -37,14 +37,16 @@ func TestFamilyDefaultsFromNodeConfig_PartialOverride(t *testing.T) {
|
|||||||
nc := &flockv1alpha1.NodeConfig{
|
nc := &flockv1alpha1.NodeConfig{
|
||||||
Spec: flockv1alpha1.NodeConfigSpec{
|
Spec: flockv1alpha1.NodeConfigSpec{
|
||||||
Defaults: &flockv1alpha1.FamilyDefaults{
|
Defaults: &flockv1alpha1.FamilyDefaults{
|
||||||
IPv4: boolPtr(true),
|
IPv4: boolPtr(false),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
d := FamilyDefaultsFromNodeConfig(nc)
|
d := FamilyDefaultsFromNodeConfig(nc)
|
||||||
// IPv6 was unset → keeps built-in true; IPv4 was set → flipped on.
|
// IPv6 unset → keeps built-in true; IPv4 explicitly set to false →
|
||||||
if !d.WantV6 || !d.WantV4 {
|
// node opts the family off. Validates that an explicit false beats
|
||||||
t.Fatalf("partial override wrong: %+v (want v6=true, v4=true)", d)
|
// the dual-stack baseline rather than being silently overridden.
|
||||||
|
if !d.WantV6 || d.WantV4 {
|
||||||
|
t.Fatalf("partial override wrong: %+v (want v6=true, v4=false)", d)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -64,12 +66,27 @@ func TestFamilyDefaultsFromNodeConfig_FullOverride(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAnnotations_BuiltinDefaults(t *testing.T) {
|
func TestParseAnnotations_BuiltinDefaults(t *testing.T) {
|
||||||
|
// Built-in baseline is dual-stack — no annotation needed.
|
||||||
a, err := ParseAnnotations(nil, BuiltinFamilyDefaults())
|
a, err := ParseAnnotations(nil, BuiltinFamilyDefaults())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
if !a.WantV6 || !a.WantV4 {
|
||||||
|
t.Fatalf("expected dual-stack default, got v6=%v v4=%v", a.WantV6, a.WantV4)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestParseAnnotations_OptOutV4 — pods that want IPv6 only must opt out
|
||||||
|
// explicitly via the ipv4 annotation now that the built-in is dual-stack.
|
||||||
|
func TestParseAnnotations_OptOutV4(t *testing.T) {
|
||||||
|
a, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "ipv4": "false",
|
||||||
|
}, BuiltinFamilyDefaults())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
if !a.WantV6 || a.WantV4 {
|
if !a.WantV6 || a.WantV4 {
|
||||||
t.Fatalf("defaults wrong: v6=%v v4=%v", a.WantV6, a.WantV4)
|
t.Fatalf("ipv4=false override failed: v6=%v v4=%v", a.WantV6, a.WantV4)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,6 +117,8 @@ func TestParseAnnotations_AnnotationOverridesNodeDefault(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAnnotations_DualStackViaAnnotation(t *testing.T) {
|
func TestParseAnnotations_DualStackViaAnnotation(t *testing.T) {
|
||||||
|
// Same as built-in default; explicit ipv4=true is a no-op now but must
|
||||||
|
// still parse cleanly.
|
||||||
a, err := ParseAnnotations(map[string]string{
|
a, err := ParseAnnotations(map[string]string{
|
||||||
annotationPrefix + "ipv4": "true",
|
annotationPrefix + "ipv4": "true",
|
||||||
}, BuiltinFamilyDefaults())
|
}, BuiltinFamilyDefaults())
|
||||||
@@ -112,10 +131,12 @@ func TestParseAnnotations_DualStackViaAnnotation(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAnnotations_NoFamily(t *testing.T) {
|
func TestParseAnnotations_NoFamily(t *testing.T) {
|
||||||
|
// Pod opts out of both families → must be rejected.
|
||||||
if _, err := ParseAnnotations(map[string]string{
|
if _, err := ParseAnnotations(map[string]string{
|
||||||
annotationPrefix + "ipv6": "false",
|
annotationPrefix + "ipv6": "false",
|
||||||
|
annotationPrefix + "ipv4": "false",
|
||||||
}, BuiltinFamilyDefaults()); err == nil {
|
}, BuiltinFamilyDefaults()); err == nil {
|
||||||
t.Fatalf("expected error: ipv6=false ipv4=false")
|
t.Fatalf("expected error when pod opts out of both families")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,32 +174,105 @@ func TestParseAnnotations_BoolCaseInsensitive(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAnnotations_IPAlgo(t *testing.T) {
|
// ResolveIPAlgo: precedence is pod → node → nil. Empty / missing / invalid
|
||||||
a, err := ParseAnnotations(map[string]string{
|
// at any level falls through to the next under the relaxed user-defined rule
|
||||||
annotationPrefix + "ip-algo": "namespace,pod,image",
|
// "all three mean unset".
|
||||||
}, BuiltinFamilyDefaults())
|
|
||||||
if err != nil {
|
func TestResolveIPAlgo_PodWins(t *testing.T) {
|
||||||
t.Fatal(err)
|
pod := map[string]string{annotationPrefix + annIPAlgo: "namespace,app"}
|
||||||
|
node := map[string]string{annotationPrefix + annIPAlgo: "image"}
|
||||||
|
got := ResolveIPAlgo(pod, node, nil)
|
||||||
|
want := []embed.Field{embed.FieldNamespace, embed.FieldApp}
|
||||||
|
if !equalFields(got, want) {
|
||||||
|
t.Fatalf("got %v, want %v", got, want)
|
||||||
}
|
}
|
||||||
want := []embed.Field{embed.FieldNamespace, embed.FieldPod, embed.FieldImage}
|
}
|
||||||
if len(a.IPAlgo) != len(want) {
|
|
||||||
t.Fatalf("ip-algo len=%d, want %d", len(a.IPAlgo), len(want))
|
func TestResolveIPAlgo_PodAbsentFallsToNode(t *testing.T) {
|
||||||
|
node := map[string]string{annotationPrefix + annIPAlgo: "image"}
|
||||||
|
got := ResolveIPAlgo(nil, node, nil)
|
||||||
|
want := []embed.Field{embed.FieldImage}
|
||||||
|
if !equalFields(got, want) {
|
||||||
|
t.Fatalf("got %v, want %v", got, want)
|
||||||
}
|
}
|
||||||
for i := range want {
|
}
|
||||||
if a.IPAlgo[i] != want[i] {
|
|
||||||
t.Fatalf("ip-algo[%d]=%s, want %s", i, a.IPAlgo[i], want[i])
|
func TestResolveIPAlgo_PodEmptyFallsToNode(t *testing.T) {
|
||||||
|
pod := map[string]string{annotationPrefix + annIPAlgo: ""}
|
||||||
|
node := map[string]string{annotationPrefix + annIPAlgo: "image"}
|
||||||
|
got := ResolveIPAlgo(pod, node, nil)
|
||||||
|
want := []embed.Field{embed.FieldImage}
|
||||||
|
if !equalFields(got, want) {
|
||||||
|
t.Fatalf("got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveIPAlgo_PodInvalidFallsToNode(t *testing.T) {
|
||||||
|
for _, podVal := range []string{"namespace,bogus", "ns", ",", "namespace,namespace"} {
|
||||||
|
pod := map[string]string{annotationPrefix + annIPAlgo: podVal}
|
||||||
|
node := map[string]string{annotationPrefix + annIPAlgo: "app"}
|
||||||
|
got := ResolveIPAlgo(pod, node, nil)
|
||||||
|
want := []embed.Field{embed.FieldApp}
|
||||||
|
if !equalFields(got, want) {
|
||||||
|
t.Fatalf("podVal=%q: got %v, want %v", podVal, got, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAnnotations_IPAlgo_Unknown(t *testing.T) {
|
func TestResolveIPAlgo_BothInvalidReturnsNil(t *testing.T) {
|
||||||
if _, err := ParseAnnotations(map[string]string{
|
pod := map[string]string{annotationPrefix + annIPAlgo: "bogus"}
|
||||||
annotationPrefix + "ip-algo": "namespace,foo",
|
node := map[string]string{annotationPrefix + annIPAlgo: "also-bogus"}
|
||||||
}, BuiltinFamilyDefaults()); err == nil {
|
if got := ResolveIPAlgo(pod, node, nil); got != nil {
|
||||||
t.Fatalf("expected unknown-field error")
|
t.Fatalf("got %v, want nil", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestResolveIPAlgo_BothAbsentReturnsNil(t *testing.T) {
|
||||||
|
if got := ResolveIPAlgo(nil, nil, nil); got != nil {
|
||||||
|
t.Fatalf("got %v, want nil", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveIPAlgo_NilNodeMap(t *testing.T) {
|
||||||
|
pod := map[string]string{annotationPrefix + annIPAlgo: "image"}
|
||||||
|
got := ResolveIPAlgo(pod, nil, nil)
|
||||||
|
want := []embed.Field{embed.FieldImage}
|
||||||
|
if !equalFields(got, want) {
|
||||||
|
t.Fatalf("got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveIPAlgo_Whitespace(t *testing.T) {
|
||||||
|
pod := map[string]string{annotationPrefix + annIPAlgo: " namespace , app "}
|
||||||
|
got := ResolveIPAlgo(pod, nil, nil)
|
||||||
|
want := []embed.Field{embed.FieldNamespace, embed.FieldApp}
|
||||||
|
if !equalFields(got, want) {
|
||||||
|
t.Fatalf("got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveIPAlgo_DuplicateInvalidates(t *testing.T) {
|
||||||
|
pod := map[string]string{annotationPrefix + annIPAlgo: "app,app"}
|
||||||
|
node := map[string]string{annotationPrefix + annIPAlgo: "namespace"}
|
||||||
|
got := ResolveIPAlgo(pod, node, nil)
|
||||||
|
want := []embed.Field{embed.FieldNamespace}
|
||||||
|
if !equalFields(got, want) {
|
||||||
|
t.Fatalf("got %v, want %v (duplicate must collapse to invalid)", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func equalFields(a, b []embed.Field) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseAnnotations_CIDR(t *testing.T) {
|
func TestParseAnnotations_CIDR(t *testing.T) {
|
||||||
a, err := ParseAnnotations(map[string]string{
|
a, err := ParseAnnotations(map[string]string{
|
||||||
annotationPrefix + "cidr6": "2602:817:3000:f001::/64, 2602:817:3000:f002::/64",
|
annotationPrefix + "cidr6": "2602:817:3000:f001::/64, 2602:817:3000:f002::/64",
|
||||||
@@ -219,6 +313,97 @@ func TestParseAnnotations_Anycast_Mixed(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseAnnotations_Addresses_Mixed(t *testing.T) {
|
||||||
|
// Plex's case: one v6 and one v4 supplied via addresses, both families
|
||||||
|
// enabled (built-in defaults). Both IPs are recorded; conflict check
|
||||||
|
// passes; later in handlers.Add they get peeled into primary slots.
|
||||||
|
a, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "addresses": "2602:817:3000:c606::166, 142.202.202.166",
|
||||||
|
}, BuiltinFamilyDefaults())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if len(a.Addresses) != 2 {
|
||||||
|
t.Fatalf("addresses len=%d", len(a.Addresses))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAnnotations_Addresses_ConflictV4Disabled(t *testing.T) {
|
||||||
|
// addresses contains a v4 but the pod has explicitly opted out of v4.
|
||||||
|
// The IP would land on eth0 with no default v4 route, so reject at ADD.
|
||||||
|
_, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "ipv4": "false",
|
||||||
|
annotationPrefix + "addresses": "142.202.202.166",
|
||||||
|
}, BuiltinFamilyDefaults())
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("want error for ipv4=false + addresses v4, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAnnotations_Addresses_ConflictV6Disabled(t *testing.T) {
|
||||||
|
_, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "ipv6": "false",
|
||||||
|
annotationPrefix + "ipv4": "true",
|
||||||
|
annotationPrefix + "addresses": "2602:817:3000:c606::166",
|
||||||
|
}, BuiltinFamilyDefaults())
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("want error for ipv6=false + addresses v6, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAnnotations_Anycast_ConflictV4Disabled(t *testing.T) {
|
||||||
|
// Anycast on lo also requires the family enabled — replies need the
|
||||||
|
// in-pod default v4 route off eth0, which only exists when v4 is on.
|
||||||
|
_, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "ipv4": "false",
|
||||||
|
annotationPrefix + "anycast": "172.25.255.1",
|
||||||
|
}, BuiltinFamilyDefaults())
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("want error for ipv4=false + anycast v4, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAnnotations_Anycast_ConflictV6Disabled(t *testing.T) {
|
||||||
|
_, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "ipv6": "false",
|
||||||
|
annotationPrefix + "ipv4": "true",
|
||||||
|
annotationPrefix + "anycast": "2602:817:3000:ac::1",
|
||||||
|
}, BuiltinFamilyDefaults())
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("want error for ipv6=false + anycast v6, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAnnotations_Addresses_NodeDefaultV4Off(t *testing.T) {
|
||||||
|
// NodeConfig default opts v4 off for the node, and the pod has no
|
||||||
|
// explicit ipv4 annotation. addresses-v4 still conflicts because the
|
||||||
|
// resolved WantV4 is false. Operator must add `ipv4: "true"` on the
|
||||||
|
// pod to override the node default.
|
||||||
|
defaults := FamilyDefaults{WantV6: true, WantV4: false}
|
||||||
|
_, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "addresses": "142.202.202.166",
|
||||||
|
}, defaults)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("want error for NodeConfig v4=false + addresses v4, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAnnotations_Addresses_NodeDefaultV4Off_PodOptsBackIn(t *testing.T) {
|
||||||
|
// Same as above but pod explicitly sets ipv4=true to override the node
|
||||||
|
// default. Conflict resolved; parse succeeds.
|
||||||
|
defaults := FamilyDefaults{WantV6: true, WantV4: false}
|
||||||
|
a, err := ParseAnnotations(map[string]string{
|
||||||
|
annotationPrefix + "ipv4": "true",
|
||||||
|
annotationPrefix + "addresses": "142.202.202.166",
|
||||||
|
}, defaults)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected ok, got %v", err)
|
||||||
|
}
|
||||||
|
if !a.WantV4 || len(a.Addresses) != 1 {
|
||||||
|
t.Fatalf("unexpected: %+v", a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseCNIArgs(t *testing.T) {
|
func TestParseCNIArgs(t *testing.T) {
|
||||||
args := ParseCNIArgs("IgnoreUnknown=1;K8S_POD_NAMESPACE=mail;K8S_POD_NAME=stalwart-0;K8S_POD_INFRA_CONTAINER_ID=abc123")
|
args := ParseCNIArgs("IgnoreUnknown=1;K8S_POD_NAMESPACE=mail;K8S_POD_NAME=stalwart-0;K8S_POD_INFRA_CONTAINER_ID=abc123")
|
||||||
if args.PodNamespace != "mail" || args.PodName != "stalwart-0" || args.InfraID != "abc123" {
|
if args.PodNamespace != "mail" || args.PodName != "stalwart-0" || args.InfraID != "abc123" {
|
||||||
|
|||||||
@@ -0,0 +1,112 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net"
|
||||||
|
"sort"
|
||||||
|
)
|
||||||
|
|
||||||
|
// anycastNexthop is one (host-side veth, pod-eth0-IP) pair the kernel route
|
||||||
|
// can use as a multipath nexthop.
|
||||||
|
type anycastNexthop struct {
|
||||||
|
hostIface string
|
||||||
|
via net.IP
|
||||||
|
}
|
||||||
|
|
||||||
|
// anycastTarget describes the kernel route shape for one advertised anycast
|
||||||
|
// IP. When more than one Ready pod on this node binds the same anycast IP,
|
||||||
|
// every Ready pod contributes a nexthop and the kernel does per-flow ECMP
|
||||||
|
// across them.
|
||||||
|
//
|
||||||
|
// nexthops is sorted by canonical(via) for deterministic comparison and
|
||||||
|
// stable kernel-route ordering across reconcile passes — the
|
||||||
|
// AnycastReconciler skips kernel writes when the new and old targets are
|
||||||
|
// equal, which only works if the slice order is stable.
|
||||||
|
type anycastTarget struct {
|
||||||
|
nexthops []anycastNexthop
|
||||||
|
}
|
||||||
|
|
||||||
|
// equal reports whether two targets describe the same kernel route.
|
||||||
|
// Both sides are expected to be sorted (the canonical constructor sorts).
|
||||||
|
func (t anycastTarget) equal(o anycastTarget) bool {
|
||||||
|
if len(t.nexthops) != len(o.nexthops) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range t.nexthops {
|
||||||
|
if t.nexthops[i].hostIface != o.nexthops[i].hostIface {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !t.nexthops[i].via.Equal(o.nexthops[i].via) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveAnycastTargets walks the committed allocation set and returns the
|
||||||
|
// desired kernel-route shape for every anycast IP that has at least one
|
||||||
|
// Ready local pod binding it. Multiple Ready pods sharing the same anycast
|
||||||
|
// IP collapse into a single multi-nexthop target so the kernel can
|
||||||
|
// per-flow ECMP across them.
|
||||||
|
//
|
||||||
|
// Pure: no kernel calls, no informer access. Pods are surfaced via the
|
||||||
|
// isReady callback so the reconciler can plug in its informer; tests can
|
||||||
|
// pass any function that satisfies the signature.
|
||||||
|
//
|
||||||
|
// warn is invoked for human-facing skip reasons (e.g. anycast with no
|
||||||
|
// unicast of same family). nil-safe — pass nil to silently drop.
|
||||||
|
func resolveAnycastTargets(
|
||||||
|
allocations []Allocation,
|
||||||
|
isReady func(namespace, name string) bool,
|
||||||
|
warn func(string),
|
||||||
|
) map[string]anycastTarget {
|
||||||
|
if warn == nil {
|
||||||
|
warn = func(string) {}
|
||||||
|
}
|
||||||
|
out := map[string]anycastTarget{}
|
||||||
|
for _, a := range allocations {
|
||||||
|
if a.State != StateCommitted || (len(a.Anycast) == 0 && len(a.Addresses) == 0) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !isReady(a.Namespace, a.PodName) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
host := HostIfaceName(a.ContainerID)
|
||||||
|
via6 := net.ParseIP(a.IP6)
|
||||||
|
via4 := net.ParseIP(a.IP4)
|
||||||
|
// Anycast (lo-bound) and Addresses (eth0-bound) are advertised
|
||||||
|
// identically: /128 or /32 host route on the host, BGP via BIRD.
|
||||||
|
for _, ipStr := range append(a.Anycast, a.Addresses...) {
|
||||||
|
ip := net.ParseIP(ipStr)
|
||||||
|
if ip == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var via net.IP
|
||||||
|
if ip.To4() != nil {
|
||||||
|
via = via4
|
||||||
|
} else {
|
||||||
|
via = via6
|
||||||
|
}
|
||||||
|
if via == nil {
|
||||||
|
warn("anycast " + ipStr + " skipped: pod " +
|
||||||
|
a.Namespace + "/" + a.PodName +
|
||||||
|
" has no unicast of same family")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key := canonical(ip)
|
||||||
|
t := out[key]
|
||||||
|
t.nexthops = append(t.nexthops, anycastNexthop{hostIface: host, via: via})
|
||||||
|
out[key] = t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Sort each target's nexthops for stable comparison + stable kernel
|
||||||
|
// ordering. Sort key is canonical(via) — sufficient for stability
|
||||||
|
// because (host, via) pairs are 1:1 (one veth per pod, one v6+v4 per
|
||||||
|
// pod, so via uniquely identifies the nexthop).
|
||||||
|
for k, t := range out {
|
||||||
|
sort.Slice(t.nexthops, func(i, j int) bool {
|
||||||
|
return canonical(t.nexthops[i].via) < canonical(t.nexthops[j].via)
|
||||||
|
})
|
||||||
|
out[k] = t
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
+127
-76
@@ -26,6 +26,11 @@ import (
|
|||||||
// - Pod transitions to Ready=False or DELETE → remove kernel route, remove
|
// - Pod transitions to Ready=False or DELETE → remove kernel route, remove
|
||||||
// from BIRD export.
|
// from BIRD export.
|
||||||
//
|
//
|
||||||
|
// When more than one Ready pod on this node binds the same anycast IP, the
|
||||||
|
// kernel route uses RTA_MULTIPATH so the kernel does per-flow ECMP across
|
||||||
|
// the contributing pods. This is the within-node companion to BGP-level
|
||||||
|
// ECMP across nodes.
|
||||||
|
//
|
||||||
// Reconcile is idempotent. Triggers: AfterCommit hook, Pod informer
|
// Reconcile is idempotent. Triggers: AfterCommit hook, Pod informer
|
||||||
// UpdateFunc on Ready transitions, periodic 2s tick.
|
// UpdateFunc on Ready transitions, periodic 2s tick.
|
||||||
type AnycastReconciler struct {
|
type AnycastReconciler struct {
|
||||||
@@ -42,13 +47,6 @@ type AnycastReconciler struct {
|
|||||||
trigger chan struct{}
|
trigger chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// anycastTarget describes the kernel route shape for one advertised
|
|
||||||
// anycast IP: which veth, and which pod eth0 IP to use as next-hop.
|
|
||||||
type anycastTarget struct {
|
|
||||||
hostIface string
|
|
||||||
via net.IP
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewAnycastReconciler returns a Reconciler ready to Run.
|
// NewAnycastReconciler returns a Reconciler ready to Run.
|
||||||
func NewAnycastReconciler(node string, store *Store, pods *PodCache, nc *NodeConfigCache, bird *BirdManager, routerID string, logger *slog.Logger) *AnycastReconciler {
|
func NewAnycastReconciler(node string, store *Store, pods *PodCache, nc *NodeConfigCache, bird *BirdManager, routerID string, logger *slog.Logger) *AnycastReconciler {
|
||||||
return &AnycastReconciler{
|
return &AnycastReconciler{
|
||||||
@@ -96,25 +94,26 @@ func (r *AnycastReconciler) reconcile() {
|
|||||||
|
|
||||||
desired := r.computeDesired()
|
desired := r.computeDesired()
|
||||||
|
|
||||||
// Install routes that should exist but don't (or whose target changed).
|
// Install routes that should exist but don't, or whose nexthop set
|
||||||
|
// changed.
|
||||||
for ip, t := range desired {
|
for ip, t := range desired {
|
||||||
if cur, ok := r.advertised[ip]; ok && cur.hostIface == t.hostIface && cur.via.Equal(t.via) {
|
if cur, ok := r.advertised[ip]; ok && cur.equal(t) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := installAnycastRoute(ip, t); err != nil {
|
if err := installAnycastRoute(ip, t); err != nil {
|
||||||
r.Logger.Warn("anycast install", "ip", ip, "host", t.hostIface, "via", t.via, "err", err)
|
r.Logger.Warn("anycast install", "ip", ip, "nexthops", len(t.nexthops), "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
r.Logger.Info("anycast advertise", "ip", ip, "host", t.hostIface, "via", t.via)
|
r.Logger.Info("anycast advertise", "ip", ip, "nexthops", describeNexthops(t))
|
||||||
r.advertised[ip] = t
|
r.advertised[ip] = t
|
||||||
}
|
}
|
||||||
// Remove routes that exist but shouldn't.
|
// Remove routes that exist but shouldn't.
|
||||||
for ip, t := range r.advertised {
|
for ip, t := range r.advertised {
|
||||||
if _, want := desired[ip]; !want {
|
if _, want := desired[ip]; !want {
|
||||||
if err := removeAnycastRoute(ip, t); err != nil {
|
if err := removeAnycastRoute(ip, t); err != nil {
|
||||||
r.Logger.Warn("anycast remove", "ip", ip, "host", t.hostIface, "err", err)
|
r.Logger.Warn("anycast remove", "ip", ip, "err", err)
|
||||||
} else {
|
} else {
|
||||||
r.Logger.Info("anycast withdraw", "ip", ip, "host", t.hostIface)
|
r.Logger.Info("anycast withdraw", "ip", ip)
|
||||||
}
|
}
|
||||||
delete(r.advertised, ip)
|
delete(r.advertised, ip)
|
||||||
}
|
}
|
||||||
@@ -124,44 +123,17 @@ func (r *AnycastReconciler) reconcile() {
|
|||||||
r.renderBird(desired)
|
r.renderBird(desired)
|
||||||
}
|
}
|
||||||
|
|
||||||
// computeDesired walks the Store and returns the per-ip anycastTarget for
|
// computeDesired delegates to the pure resolveAnycastTargets and plugs in
|
||||||
// every anycast advertisement that should be active right now. Each target
|
// the live informer-based isReady callback.
|
||||||
// uses the pod's own eth0 IP (same family) as the route's `via` next-hop —
|
|
||||||
// that way kernel NDP/ARP resolves the eth0 address, which IS configured
|
|
||||||
// on the pod's eth0, so the pod responds normally without proxy_ndp.
|
|
||||||
func (r *AnycastReconciler) computeDesired() map[string]anycastTarget {
|
func (r *AnycastReconciler) computeDesired() map[string]anycastTarget {
|
||||||
out := map[string]anycastTarget{}
|
return resolveAnycastTargets(
|
||||||
for _, a := range r.Store.Snapshot() {
|
r.Store.Snapshot(),
|
||||||
if a.State != StateCommitted || len(a.Anycast) == 0 {
|
func(ns, name string) bool {
|
||||||
continue
|
pod, ok := r.Pods.Get(ns, name)
|
||||||
}
|
return ok && podAnycastEligible(pod)
|
||||||
pod, ok := r.Pods.Get(a.Namespace, a.PodName)
|
},
|
||||||
if !ok || !podReady(pod) {
|
func(s string) { r.Logger.Warn(s) },
|
||||||
continue
|
)
|
||||||
}
|
|
||||||
host := HostIfaceName(a.ContainerID)
|
|
||||||
via6 := net.ParseIP(a.IP6)
|
|
||||||
via4 := net.ParseIP(a.IP4)
|
|
||||||
for _, ipStr := range a.Anycast {
|
|
||||||
ip := net.ParseIP(ipStr)
|
|
||||||
if ip == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
var via net.IP
|
|
||||||
if ip.To4() != nil {
|
|
||||||
via = via4
|
|
||||||
} else {
|
|
||||||
via = via6
|
|
||||||
}
|
|
||||||
if via == nil {
|
|
||||||
r.Logger.Warn("anycast skipped: pod has no unicast IP of same family",
|
|
||||||
"pod", a.Namespace+"/"+a.PodName, "anycast", ipStr)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
out[canonical(ip)] = anycastTarget{hostIface: host, via: via}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *AnycastReconciler) renderBird(desired map[string]anycastTarget) {
|
func (r *AnycastReconciler) renderBird(desired map[string]anycastTarget) {
|
||||||
@@ -170,72 +142,139 @@ func (r *AnycastReconciler) renderBird(desired map[string]anycastTarget) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
var v6, v4 []string
|
var v6, v4 []string
|
||||||
for ipStr := range desired {
|
seen := map[string]struct{}{}
|
||||||
ip := net.ParseIP(ipStr)
|
add := func(ip net.IP) {
|
||||||
if ip == nil {
|
key := canonical(ip)
|
||||||
continue
|
if _, dup := seen[key]; dup {
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
if ip.To4() != nil {
|
if ip.To4() != nil {
|
||||||
v4 = append(v4, ip.To4().String())
|
v4 = append(v4, ip.To4().String())
|
||||||
} else {
|
} else {
|
||||||
v6 = append(v6, ip.To16().String())
|
v6 = append(v6, ip.To16().String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for ipStr := range desired {
|
||||||
|
if ip := net.ParseIP(ipStr); ip != nil {
|
||||||
|
add(ip)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// A pod IP that lives outside the node's BGP aggregate (e.g. an
|
||||||
|
// addresses-annotation IP promoted to be the pod's primary v4 — Plex's
|
||||||
|
// 142.202.202.166 against host004's 172.25.214.0/24) is not naturally
|
||||||
|
// covered by the aggregate, so it must be advertised individually as a
|
||||||
|
// /32 or /128. Anycast and addresses extras are already covered by the
|
||||||
|
// `desired` loop above; this sweep is for promoted-primary IPs which do
|
||||||
|
// not flow through the AnycastReconciler.
|
||||||
|
nodeV6, nodeV4 := parseNodeCIDRs(nc)
|
||||||
|
for _, a := range r.Store.Snapshot() {
|
||||||
|
if a.State != StateCommitted {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if ip := net.ParseIP(a.IP6); ip != nil && !ipInAny(ip, nodeV6) {
|
||||||
|
add(ip)
|
||||||
|
}
|
||||||
|
if ip := net.ParseIP(a.IP4); ip != nil && !ipInAny(ip, nodeV4) {
|
||||||
|
add(ip)
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := r.Bird.Render(nc, v6, v4, r.RouterID); err != nil {
|
if err := r.Bird.Render(nc, v6, v4, r.RouterID); err != nil {
|
||||||
r.Logger.Warn("anycast bird render", "err", err)
|
r.Logger.Warn("anycast bird render", "err", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// installAnycastRoute installs `<ipStr>/<128|32> via t.via dev t.hostIface`.
|
// parseNodeCIDRs parses NodeConfig.Spec.CIDR6/4 strings into IPNets,
|
||||||
|
// silently dropping malformed entries (admission-time validation should
|
||||||
|
// have rejected them long before this point).
|
||||||
|
func parseNodeCIDRs(nc *flockv1alpha1.NodeConfig) (v6, v4 []*net.IPNet) {
|
||||||
|
for _, s := range nc.Spec.CIDR6 {
|
||||||
|
if _, n, err := net.ParseCIDR(s); err == nil {
|
||||||
|
v6 = append(v6, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, s := range nc.Spec.CIDR4 {
|
||||||
|
if _, n, err := net.ParseCIDR(s); err == nil {
|
||||||
|
v4 = append(v4, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func ipInAny(ip net.IP, nets []*net.IPNet) bool {
|
||||||
|
for _, n := range nets {
|
||||||
|
if n.Contains(ip) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// installAnycastRoute installs `<ipStr>/<128|32>` pointing at the
|
||||||
|
// nexthop set in t. With one nexthop the route is a plain via-route;
|
||||||
|
// with multiple, it's a multipath route using RTA_MULTIPATH so the
|
||||||
|
// kernel hashes flows across the constituent pods.
|
||||||
|
//
|
||||||
// Idempotent — RouteReplace overwrites a stale entry.
|
// Idempotent — RouteReplace overwrites a stale entry.
|
||||||
func installAnycastRoute(ipStr string, t anycastTarget) error {
|
func installAnycastRoute(ipStr string, t anycastTarget) error {
|
||||||
ip := net.ParseIP(ipStr)
|
ip := net.ParseIP(ipStr)
|
||||||
if ip == nil {
|
if ip == nil {
|
||||||
return fmt.Errorf("bad ip %q", ipStr)
|
return fmt.Errorf("bad ip %q", ipStr)
|
||||||
}
|
}
|
||||||
link, err := netlink.LinkByName(t.hostIface)
|
if len(t.nexthops) == 0 {
|
||||||
if err != nil {
|
return fmt.Errorf("anycast %s: no nexthops", ipStr)
|
||||||
return fmt.Errorf("lookup %s: %w", t.hostIface, err)
|
|
||||||
}
|
}
|
||||||
prefix := 128
|
prefix := 128
|
||||||
if ip.To4() != nil {
|
if ip.To4() != nil {
|
||||||
prefix = 32
|
prefix = 32
|
||||||
ip = ip.To4()
|
ip = ip.To4()
|
||||||
}
|
}
|
||||||
r := &netlink.Route{
|
r := &netlink.Route{Dst: cidrFor(ip, prefix)}
|
||||||
|
if len(t.nexthops) == 1 {
|
||||||
|
// Single nexthop — keep the route shape identical to today's
|
||||||
|
// production form. Functionally equivalent to a 1-element
|
||||||
|
// MultiPath but `ip route show` renders nicer for operators.
|
||||||
|
nh := t.nexthops[0]
|
||||||
|
link, err := netlink.LinkByName(nh.hostIface)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("lookup %s: %w", nh.hostIface, err)
|
||||||
|
}
|
||||||
|
r.LinkIndex = link.Attrs().Index
|
||||||
|
r.Gw = nh.via
|
||||||
|
} else {
|
||||||
|
hops := make([]*netlink.NexthopInfo, 0, len(t.nexthops))
|
||||||
|
for _, nh := range t.nexthops {
|
||||||
|
link, err := netlink.LinkByName(nh.hostIface)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("lookup %s: %w", nh.hostIface, err)
|
||||||
|
}
|
||||||
|
hops = append(hops, &netlink.NexthopInfo{
|
||||||
LinkIndex: link.Attrs().Index,
|
LinkIndex: link.Attrs().Index,
|
||||||
Dst: cidrFor(ip, prefix),
|
Gw: nh.via,
|
||||||
Gw: t.via,
|
Hops: 0,
|
||||||
// SCOPE_UNIVERSE — the gateway is on a different "logical" subnet
|
})
|
||||||
// than the local /128 route, but reachable on this veth. Linux is
|
}
|
||||||
// happy as long as the veth has IPv6 forwarding on (it does — set
|
r.MultiPath = hops
|
||||||
// in configureHostSide) and the pod's eth0 has the via address
|
|
||||||
// (also true — that's the pod's IP6/IP4 we allocated).
|
|
||||||
}
|
}
|
||||||
return netlink.RouteReplace(r)
|
return netlink.RouteReplace(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
// removeAnycastRoute deletes the host route. Missing routes / interfaces
|
// removeAnycastRoute deletes the host route. Missing routes / interfaces
|
||||||
// are treated as success — DEL paths can race with veth teardown.
|
// are treated as success — DEL paths can race with veth teardown.
|
||||||
func removeAnycastRoute(ipStr string, t anycastTarget) error {
|
//
|
||||||
|
// Kernel route deletion matches by destination prefix; we don't need to
|
||||||
|
// re-specify the nexthop set.
|
||||||
|
func removeAnycastRoute(ipStr string, _ anycastTarget) error {
|
||||||
ip := net.ParseIP(ipStr)
|
ip := net.ParseIP(ipStr)
|
||||||
if ip == nil {
|
if ip == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
link, err := netlink.LinkByName(t.hostIface)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
prefix := 128
|
prefix := 128
|
||||||
if ip.To4() != nil {
|
if ip.To4() != nil {
|
||||||
prefix = 32
|
prefix = 32
|
||||||
ip = ip.To4()
|
ip = ip.To4()
|
||||||
}
|
}
|
||||||
r := &netlink.Route{
|
r := &netlink.Route{Dst: cidrFor(ip, prefix)}
|
||||||
LinkIndex: link.Attrs().Index,
|
|
||||||
Dst: cidrFor(ip, prefix),
|
|
||||||
Gw: t.via,
|
|
||||||
}
|
|
||||||
if err := netlink.RouteDel(r); err != nil {
|
if err := netlink.RouteDel(r); err != nil {
|
||||||
// ESRCH ("no such process") is netlink-speak for "no such route";
|
// ESRCH ("no such process") is netlink-speak for "no such route";
|
||||||
// treat as success.
|
// treat as success.
|
||||||
@@ -247,5 +286,17 @@ func removeAnycastRoute(ipStr string, t anycastTarget) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// describeNexthops returns a compact string for log messages.
|
||||||
|
func describeNexthops(t anycastTarget) string {
|
||||||
|
var s string
|
||||||
|
for i, nh := range t.nexthops {
|
||||||
|
if i > 0 {
|
||||||
|
s += ","
|
||||||
|
}
|
||||||
|
s += nh.hostIface + "→" + nh.via.String()
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
// _ = flockv1alpha1 to silence unused import warnings on minimal builds.
|
// _ = flockv1alpha1 to silence unused import warnings on minimal builds.
|
||||||
var _ = flockv1alpha1.GroupName
|
var _ = flockv1alpha1.GroupName
|
||||||
|
|||||||
@@ -0,0 +1,227 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// allReady is a convenience isReady that says yes to every pod.
|
||||||
|
func allReady(_, _ string) bool { return true }
|
||||||
|
|
||||||
|
// readyOnly returns an isReady that only says yes to the named pods.
|
||||||
|
func readyOnly(want ...string) func(string, string) bool {
|
||||||
|
set := map[string]struct{}{}
|
||||||
|
for _, n := range want {
|
||||||
|
set[n] = struct{}{}
|
||||||
|
}
|
||||||
|
return func(_, name string) bool {
|
||||||
|
_, ok := set[name]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveAnycastTargets_OnePodOneAnycast(t *testing.T) {
|
||||||
|
allocs := []Allocation{{
|
||||||
|
ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||||
|
State: StateCommitted,
|
||||||
|
IP6: "2001:db8::1",
|
||||||
|
Anycast: []string{"2001:db8:a::1"},
|
||||||
|
}}
|
||||||
|
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||||
|
if len(out) != 1 {
|
||||||
|
t.Fatalf("expected 1 anycast IP, got %d", len(out))
|
||||||
|
}
|
||||||
|
tgt, ok := out["2001:db8:a::1"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("missing target")
|
||||||
|
}
|
||||||
|
if len(tgt.nexthops) != 1 {
|
||||||
|
t.Fatalf("expected 1 nexthop, got %d", len(tgt.nexthops))
|
||||||
|
}
|
||||||
|
if !tgt.nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||||
|
t.Fatalf("nexthop via wrong: %v", tgt.nexthops[0].via)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two pods on the same node binding the same anycast IP must produce a
|
||||||
|
// SINGLE target with TWO nexthops. The previous behaviour (overwriting)
|
||||||
|
// was the bug this whole change exists to fix.
|
||||||
|
func TestResolveAnycastTargets_TwoPodsSameAnycast_MultiNexthop(t *testing.T) {
|
||||||
|
allocs := []Allocation{
|
||||||
|
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::2",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
{ContainerID: "c2", Namespace: "ns", PodName: "pod-b",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::1",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
}
|
||||||
|
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||||
|
tgt := out["2001:db8:a::1"]
|
||||||
|
if len(tgt.nexthops) != 2 {
|
||||||
|
t.Fatalf("expected 2 nexthops, got %d", len(tgt.nexthops))
|
||||||
|
}
|
||||||
|
// Order should be sorted by canonical(via) — ::1 before ::2.
|
||||||
|
if !tgt.nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||||
|
t.Fatalf("nexthops not sorted by via; got %v first", tgt.nexthops[0].via)
|
||||||
|
}
|
||||||
|
if !tgt.nexthops[1].via.Equal(net.ParseIP("2001:db8::2")) {
|
||||||
|
t.Fatalf("nexthops not sorted by via; got %v second", tgt.nexthops[1].via)
|
||||||
|
}
|
||||||
|
// HostIface differs per pod (different containerID → different FNV).
|
||||||
|
if tgt.nexthops[0].hostIface == tgt.nexthops[1].hostIface {
|
||||||
|
t.Fatalf("expected distinct hostIfaces, both %q", tgt.nexthops[0].hostIface)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// When one of the contributing pods goes NotReady, only the remaining
|
||||||
|
// Ready pod should appear in the target's nexthop set.
|
||||||
|
func TestResolveAnycastTargets_NotReadyDropped(t *testing.T) {
|
||||||
|
allocs := []Allocation{
|
||||||
|
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::1",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
{ContainerID: "c2", Namespace: "ns", PodName: "pod-b",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::2",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
}
|
||||||
|
out := resolveAnycastTargets(allocs, readyOnly("pod-a"), nil)
|
||||||
|
tgt := out["2001:db8:a::1"]
|
||||||
|
if len(tgt.nexthops) != 1 {
|
||||||
|
t.Fatalf("expected 1 nexthop after NotReady drop, got %d", len(tgt.nexthops))
|
||||||
|
}
|
||||||
|
if !tgt.nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||||
|
t.Fatalf("wrong surviving nexthop: %v", tgt.nexthops[0].via)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pods that haven't reached Ready are excluded entirely from the target
|
||||||
|
// set. If no pod is Ready for an anycast IP, that IP is absent from the
|
||||||
|
// output (BIRD will withdraw from BGP, kernel route will be removed).
|
||||||
|
func TestResolveAnycastTargets_NoReadyPodsOmitsIP(t *testing.T) {
|
||||||
|
allocs := []Allocation{
|
||||||
|
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::1",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
}
|
||||||
|
out := resolveAnycastTargets(allocs, readyOnly( /* none */ ), nil)
|
||||||
|
if _, ok := out["2001:db8:a::1"]; ok {
|
||||||
|
t.Fatalf("anycast should be absent when no pod ready")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pending allocations (CNI ADD partway through) are skipped even if the
|
||||||
|
// pod is Ready — we don't program kernel routes for partial setups.
|
||||||
|
func TestResolveAnycastTargets_PendingSkipped(t *testing.T) {
|
||||||
|
allocs := []Allocation{
|
||||||
|
{ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||||
|
State: StatePending, IP6: "2001:db8::1",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
}
|
||||||
|
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatalf("pending allocations must be skipped")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mixed v6+v4 anycast on the same pod produces two separate target
|
||||||
|
// entries, one per family, each anchored on the matching unicast IP.
|
||||||
|
func TestResolveAnycastTargets_MixedFamilies(t *testing.T) {
|
||||||
|
allocs := []Allocation{{
|
||||||
|
ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||||
|
State: StateCommitted,
|
||||||
|
IP6: "2001:db8::1",
|
||||||
|
IP4: "10.0.0.1",
|
||||||
|
Anycast: []string{"2001:db8:a::1", "10.255.0.1"},
|
||||||
|
}}
|
||||||
|
out := resolveAnycastTargets(allocs, allReady, nil)
|
||||||
|
if !out["2001:db8:a::1"].nexthops[0].via.Equal(net.ParseIP("2001:db8::1")) {
|
||||||
|
t.Fatalf("v6 anycast should resolve via v6 unicast")
|
||||||
|
}
|
||||||
|
if !out["10.255.0.1"].nexthops[0].via.Equal(net.ParseIP("10.0.0.1").To4()) {
|
||||||
|
t.Fatalf("v4 anycast should resolve via v4 unicast")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// An anycast whose family has no matching unicast on the pod is skipped
|
||||||
|
// with a warning. Other anycast IPs on the same pod are unaffected.
|
||||||
|
func TestResolveAnycastTargets_FamilyMismatchWarns(t *testing.T) {
|
||||||
|
allocs := []Allocation{{
|
||||||
|
ContainerID: "c1", Namespace: "ns", PodName: "pod-a",
|
||||||
|
State: StateCommitted,
|
||||||
|
IP6: "2001:db8::1", // v6 only
|
||||||
|
Anycast: []string{"2001:db8:a::1", "10.255.0.1"},
|
||||||
|
}}
|
||||||
|
var warns []string
|
||||||
|
out := resolveAnycastTargets(allocs, allReady, func(s string) { warns = append(warns, s) })
|
||||||
|
if _, has := out["2001:db8:a::1"]; !has {
|
||||||
|
t.Fatalf("v6 anycast should have been programmed")
|
||||||
|
}
|
||||||
|
if _, has := out["10.255.0.1"]; has {
|
||||||
|
t.Fatalf("v4 anycast should have been skipped")
|
||||||
|
}
|
||||||
|
if len(warns) != 1 {
|
||||||
|
t.Fatalf("expected 1 warning, got %d: %v", len(warns), warns)
|
||||||
|
}
|
||||||
|
if !strings.Contains(warns[0], "10.255.0.1") {
|
||||||
|
t.Fatalf("warning should mention skipped IP: %q", warns[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determinism: the same input must produce nexthops in the same order.
|
||||||
|
func TestResolveAnycastTargets_Determinism(t *testing.T) {
|
||||||
|
allocs := []Allocation{
|
||||||
|
{ContainerID: "z-late", Namespace: "ns", PodName: "z",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::5",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
{ContainerID: "a-early", Namespace: "ns", PodName: "a",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::3",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
{ContainerID: "m-mid", Namespace: "ns", PodName: "m",
|
||||||
|
State: StateCommitted, IP6: "2001:db8::4",
|
||||||
|
Anycast: []string{"2001:db8:a::1"}},
|
||||||
|
}
|
||||||
|
a := resolveAnycastTargets(allocs, allReady, nil)
|
||||||
|
b := resolveAnycastTargets(allocs, allReady, nil)
|
||||||
|
if !a["2001:db8:a::1"].equal(b["2001:db8:a::1"]) {
|
||||||
|
t.Fatalf("same input produced unequal targets")
|
||||||
|
}
|
||||||
|
// Sorted by canonical(via): ::3, ::4, ::5
|
||||||
|
via := a["2001:db8:a::1"].nexthops
|
||||||
|
if !via[0].via.Equal(net.ParseIP("2001:db8::3")) ||
|
||||||
|
!via[1].via.Equal(net.ParseIP("2001:db8::4")) ||
|
||||||
|
!via[2].via.Equal(net.ParseIP("2001:db8::5")) {
|
||||||
|
t.Fatalf("nexthops not stably sorted: %v %v %v", via[0].via, via[1].via, via[2].via)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// equal()'s contract — different orderings are still considered equal
|
||||||
|
// AS LONG AS both sides have been canonicalised by resolveAnycastTargets.
|
||||||
|
// Across-call comparisons of resolver outputs must always match for the
|
||||||
|
// same logical input.
|
||||||
|
func TestAnycastTarget_Equal(t *testing.T) {
|
||||||
|
a := anycastTarget{nexthops: []anycastNexthop{
|
||||||
|
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||||
|
{hostIface: "f2", via: net.ParseIP("2001:db8::2")},
|
||||||
|
}}
|
||||||
|
b := anycastTarget{nexthops: []anycastNexthop{
|
||||||
|
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||||
|
{hostIface: "f2", via: net.ParseIP("2001:db8::2")},
|
||||||
|
}}
|
||||||
|
if !a.equal(b) {
|
||||||
|
t.Fatalf("equal targets reported unequal")
|
||||||
|
}
|
||||||
|
c := anycastTarget{nexthops: []anycastNexthop{
|
||||||
|
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||||
|
}}
|
||||||
|
if a.equal(c) {
|
||||||
|
t.Fatalf("targets with different lengths reported equal")
|
||||||
|
}
|
||||||
|
d := anycastTarget{nexthops: []anycastNexthop{
|
||||||
|
{hostIface: "f1", via: net.ParseIP("2001:db8::1")},
|
||||||
|
{hostIface: "f2", via: net.ParseIP("2001:db8::3")}, // diff IP
|
||||||
|
}}
|
||||||
|
if a.equal(d) {
|
||||||
|
t.Fatalf("targets with different vias reported equal")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -55,6 +55,12 @@ func (b *BirdManager) Render(nc *flockv1alpha1.NodeConfig, anycast6, anycast4 []
|
|||||||
// the BGP peer. crt001 rejects IPv6 advertisements whose next-hop is
|
// the BGP peer. crt001 rejects IPv6 advertisements whose next-hop is
|
||||||
// link-local-only; an explicit `source address` makes BIRD use a
|
// link-local-only; an explicit `source address` makes BIRD use a
|
||||||
// global next-hop self, which Cisco accepts.
|
// global next-hop self, which Cisco accepts.
|
||||||
|
//
|
||||||
|
// Also derive the connected subnet (peer IP masked to /64 v6 / /24 v4)
|
||||||
|
// per family. Render uses it to install `import where net != <subnet>`
|
||||||
|
// on the BGP channel so the gateway can't readvertise our own connected
|
||||||
|
// /64 back to us — accepting it would override the kernel route and
|
||||||
|
// hairpin all inter-host traffic via the gateway.
|
||||||
for _, p := range nc.Spec.BGP.Peers {
|
for _, p := range nc.Spec.BGP.Peers {
|
||||||
fam := bird.FamilyOf(p.Address)
|
fam := bird.FamilyOf(p.Address)
|
||||||
if fam == "" {
|
if fam == "" {
|
||||||
@@ -69,6 +75,14 @@ func (b *BirdManager) Render(nc *flockv1alpha1.NodeConfig, anycast6, anycast4 []
|
|||||||
in.LocalV4 = local
|
in.LocalV4 = local
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if subnet := peerSubnet(p.Address); subnet != "" {
|
||||||
|
if fam == "v6" && in.LocalSubnetV6 == "" {
|
||||||
|
in.LocalSubnetV6 = subnet
|
||||||
|
}
|
||||||
|
if fam == "v4" && in.LocalSubnetV4 == "" {
|
||||||
|
in.LocalSubnetV4 = subnet
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, err := bird.Render(in)
|
cfg, err := bird.Render(in)
|
||||||
@@ -165,6 +179,25 @@ func (b *BirdManager) SummaryRoutes(nc *flockv1alpha1.NodeConfig) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// peerSubnet returns the canonical CIDR of the assumed connected subnet
|
||||||
|
// containing `peer` — /64 for IPv6, /24 for IPv4. Returns "" if peer
|
||||||
|
// doesn't parse. Matches the assumption already baked into
|
||||||
|
// localAddrSameSubnet: fritzlab convention is /64 v6 and /24 v4.
|
||||||
|
func peerSubnet(peer string) string {
|
||||||
|
pip := net.ParseIP(peer)
|
||||||
|
if pip == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
var mask net.IPMask
|
||||||
|
if pip.To4() != nil {
|
||||||
|
mask = net.CIDRMask(24, 32)
|
||||||
|
} else {
|
||||||
|
mask = net.CIDRMask(64, 128)
|
||||||
|
}
|
||||||
|
n := &net.IPNet{IP: pip.Mask(mask), Mask: mask}
|
||||||
|
return n.String()
|
||||||
|
}
|
||||||
|
|
||||||
// localAddrSameSubnet finds an IP on a local interface that's in the same
|
// localAddrSameSubnet finds an IP on a local interface that's in the same
|
||||||
// /64 (v6) or /24 (v4) as `peer`. Returns "" if none. Used to derive the
|
// /64 (v6) or /24 (v4) as `peer`. Returns "" if none. Used to derive the
|
||||||
// `source address` for a BGP session.
|
// `source address` for a BGP session.
|
||||||
|
|||||||
@@ -0,0 +1,25 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestPeerSubnet(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
peer string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"2602:817:3000:a25::1", "2602:817:3000:a25::/64"},
|
||||||
|
{"2602:817:3000:a25::104", "2602:817:3000:a25::/64"},
|
||||||
|
{"172.25.25.1", "172.25.25.0/24"},
|
||||||
|
{"172.25.25.104", "172.25.25.0/24"},
|
||||||
|
{"", ""},
|
||||||
|
{"not-an-ip", ""},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.peer, func(t *testing.T) {
|
||||||
|
got := peerSubnet(tc.peer)
|
||||||
|
if got != tc.want {
|
||||||
|
t.Fatalf("peerSubnet(%q) = %q, want %q", tc.peer, got, tc.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
+149
-5
@@ -3,14 +3,91 @@ package agent
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"net"
|
"net"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
flockcni "code.fritzlab.net/fritzlab/flock/pkg/cni"
|
flockcni "code.fritzlab.net/fritzlab/flock/pkg/cni"
|
||||||
cnitypes "github.com/containernetworking/cni/pkg/types"
|
cnitypes "github.com/containernetworking/cni/pkg/types"
|
||||||
current "github.com/containernetworking/cni/pkg/types/100"
|
current "github.com/containernetworking/cni/pkg/types/100"
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// podTemplateHashLabel is the well-known label Kubernetes attaches to
|
||||||
|
// every Pod owned by a ReplicaSet so the ReplicaSet name can be
|
||||||
|
// reconstructed as "<deploy>-<hash>". We use it to peel the hash back off
|
||||||
|
// in deriveAppName.
|
||||||
|
const podTemplateHashLabel = "pod-template-hash"
|
||||||
|
|
||||||
|
// deriveAppName returns the stable workload identifier for a Pod — the
|
||||||
|
// name of the topmost stable controller, with the pod-template-hash
|
||||||
|
// stripped for ReplicaSet-owned pods.
|
||||||
|
//
|
||||||
|
// The rule maps to Kubernetes pod-name generation:
|
||||||
|
//
|
||||||
|
// Deployment → ReplicaSet → Pod pod owner is RS named "<deploy>-<hash>";
|
||||||
|
// strip the trailing "-<hash>" to recover
|
||||||
|
// the Deployment name.
|
||||||
|
// StatefulSet → Pod pod owner is the STS itself; use as-is.
|
||||||
|
// DaemonSet → Pod pod owner is the DS itself; use as-is.
|
||||||
|
// Job → Pod pod owner is the Job itself; use as-is.
|
||||||
|
// (bare pod) → Pod no controller owner; fall back to pod name.
|
||||||
|
//
|
||||||
|
// All replicas of the same workload converge on the same return value,
|
||||||
|
// which is the property the ip-algo `app` field needs.
|
||||||
|
func deriveAppName(pod *corev1.Pod) string {
|
||||||
|
owner := controllerOwner(pod)
|
||||||
|
if owner == nil {
|
||||||
|
return pod.Name
|
||||||
|
}
|
||||||
|
if owner.Kind == "ReplicaSet" {
|
||||||
|
if hash, ok := pod.Labels[podTemplateHashLabel]; ok && hash != "" {
|
||||||
|
suffix := "-" + hash
|
||||||
|
if strings.HasSuffix(owner.Name, suffix) {
|
||||||
|
return strings.TrimSuffix(owner.Name, suffix)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Custom controller named the RS something that doesn't match
|
||||||
|
// the pod-template-hash convention. Falling back to the RS name
|
||||||
|
// keeps replicas of the same RS aligned, which is the second-
|
||||||
|
// best correctness we can offer.
|
||||||
|
return owner.Name
|
||||||
|
}
|
||||||
|
return owner.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
// controllerOwner returns the OwnerReference flagged with Controller=true,
|
||||||
|
// or nil if none. Kubernetes guarantees at most one controller per object.
|
||||||
|
func controllerOwner(pod *corev1.Pod) *metav1OwnerLite {
|
||||||
|
for i := range pod.OwnerReferences {
|
||||||
|
o := &pod.OwnerReferences[i]
|
||||||
|
if o.Controller != nil && *o.Controller {
|
||||||
|
return &metav1OwnerLite{Kind: o.Kind, Name: o.Name}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// metav1OwnerLite is the slice of OwnerReference we actually consult,
|
||||||
|
// kept tiny so it can be returned by value-pointer cheaply.
|
||||||
|
type metav1OwnerLite struct {
|
||||||
|
Kind string
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
|
// podImageRef returns a deterministic image reference for the embed
|
||||||
|
// `image` field. We use the first container's spec'd image — this is
|
||||||
|
// stable across replicas of the same Deployment without requiring the
|
||||||
|
// runtime-resolved digest. Empty string if the pod has no containers,
|
||||||
|
// in which case the embed package falls back to FNV(containerID).
|
||||||
|
func podImageRef(pod *corev1.Pod) string {
|
||||||
|
if len(pod.Spec.Containers) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return pod.Spec.Containers[0].Image
|
||||||
|
}
|
||||||
|
|
||||||
// PodHandler is the platform-agnostic ADD/DEL/CHECK implementation. It
|
// PodHandler is the platform-agnostic ADD/DEL/CHECK implementation. It
|
||||||
// resolves the Pod from the informer cache, parses annotations, allocates
|
// resolves the Pod from the informer cache, parses annotations, allocates
|
||||||
// from IPAM, programs netns (or skips on non-Linux build), and persists
|
// from IPAM, programs netns (or skips on non-Linux build), and persists
|
||||||
@@ -22,6 +99,7 @@ type PodHandler struct {
|
|||||||
IPAM *IPAM
|
IPAM *IPAM
|
||||||
Pods *PodCache
|
Pods *PodCache
|
||||||
NodeConfig *NodeConfigCache
|
NodeConfig *NodeConfigCache
|
||||||
|
Logger *slog.Logger
|
||||||
// SetupFunc and TeardownFunc are injected at startup; in production
|
// SetupFunc and TeardownFunc are injected at startup; in production
|
||||||
// they point at the Linux netlink ops, in tests they're fakes.
|
// they point at the Linux netlink ops, in tests they're fakes.
|
||||||
SetupFunc func(SetupRequest) error
|
SetupFunc func(SetupRequest) error
|
||||||
@@ -49,26 +127,58 @@ func (h *PodHandler) Add(ctx context.Context, req flockcni.Request) (*current.Re
|
|||||||
return nil, fmt.Errorf("lookup pod: %w", err)
|
return nil, fmt.Errorf("lookup pod: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defaults := FamilyDefaultsFromNodeConfig(h.NodeConfig.Load())
|
nc := h.NodeConfig.Load()
|
||||||
|
defaults := FamilyDefaultsFromNodeConfig(nc)
|
||||||
parsed, err := ParseAnnotations(pod.Annotations, defaults)
|
parsed, err := ParseAnnotations(pod.Annotations, defaults)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("parse annotations: %w", err)
|
return nil, fmt.Errorf("parse annotations: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var nodeAnn map[string]string
|
||||||
|
if nc != nil {
|
||||||
|
nodeAnn = nc.GetAnnotations()
|
||||||
|
}
|
||||||
|
ipAlgo := ResolveIPAlgo(pod.Annotations, nodeAnn, h.Logger)
|
||||||
|
|
||||||
|
// addresses-annotation IPs replace IPAM allocation for any family they
|
||||||
|
// cover. Plex needs its public IPv4 to be the pod's primary v4 (default
|
||||||
|
// route source, on-link host route, /32 in BGP) — not just an extra IP
|
||||||
|
// layered on top of a private IPAM allocation. Peel one v6 + one v4 out
|
||||||
|
// of Addresses to use as the pod's primary IPs; anything beyond that
|
||||||
|
// stays in addrExtras and gets the existing layered behavior.
|
||||||
|
addrV6, addrV4, addrExtras := splitAddressesPrimary(parsed.Addresses)
|
||||||
|
|
||||||
allocReq := AllocRequest{
|
allocReq := AllocRequest{
|
||||||
ContainerID: req.ContainerID,
|
ContainerID: req.ContainerID,
|
||||||
Namespace: args.PodNamespace,
|
Namespace: args.PodNamespace,
|
||||||
Pod: args.PodName,
|
Pod: args.PodName,
|
||||||
WantV6: parsed.WantV6,
|
App: deriveAppName(pod),
|
||||||
WantV4: parsed.WantV4,
|
WantV6: parsed.WantV6 && addrV6 == nil,
|
||||||
|
WantV4: parsed.WantV4 && addrV4 == nil,
|
||||||
AnnCIDR6: parsed.CIDR6,
|
AnnCIDR6: parsed.CIDR6,
|
||||||
AnnCIDR4: parsed.CIDR4,
|
AnnCIDR4: parsed.CIDR4,
|
||||||
IPAlgo: parsed.IPAlgo,
|
IPAlgo: ipAlgo,
|
||||||
|
Image: podImageRef(pod),
|
||||||
}
|
}
|
||||||
res, err := h.IPAM.Allocate(allocReq)
|
var res AllocResult
|
||||||
|
if allocReq.WantV6 || allocReq.WantV4 {
|
||||||
|
var err error
|
||||||
|
res, err = h.IPAM.Allocate(allocReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("ipam: %w", err)
|
return nil, fmt.Errorf("ipam: %w", err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// Promote the peeled addresses IPs into the primary slots. They get the
|
||||||
|
// IPAM-style routing path: bound to eth0 in configurePodSide, default
|
||||||
|
// route via fe80::1 / v4ProxyGW, on-link host route via setHostRoute.
|
||||||
|
// BGP advertisement of the /32/128 is handled by the AnycastReconciler
|
||||||
|
// via renderBird's outside-aggregate detection.
|
||||||
|
if addrV6 != nil {
|
||||||
|
res.IP6 = addrV6
|
||||||
|
}
|
||||||
|
if addrV4 != nil {
|
||||||
|
res.IP4 = addrV4
|
||||||
|
}
|
||||||
|
|
||||||
// Persist pending entry before any netlink work so a crash mid-ADD
|
// Persist pending entry before any netlink work so a crash mid-ADD
|
||||||
// leaves recoverable state.
|
// leaves recoverable state.
|
||||||
@@ -80,6 +190,7 @@ func (h *PodHandler) Add(ctx context.Context, req flockcni.Request) (*current.Re
|
|||||||
IP6: ipString(res.IP6),
|
IP6: ipString(res.IP6),
|
||||||
IP4: ipString(res.IP4),
|
IP4: ipString(res.IP4),
|
||||||
Anycast: anycastStrings(parsed.Anycast),
|
Anycast: anycastStrings(parsed.Anycast),
|
||||||
|
Addresses: anycastStrings(addrExtras),
|
||||||
State: StatePending,
|
State: StatePending,
|
||||||
AllocatedAt: time.Now().UTC(),
|
AllocatedAt: time.Now().UTC(),
|
||||||
}
|
}
|
||||||
@@ -96,6 +207,7 @@ func (h *PodHandler) Add(ctx context.Context, req flockcni.Request) (*current.Re
|
|||||||
IP6: res.IP6,
|
IP6: res.IP6,
|
||||||
IP4: res.IP4,
|
IP4: res.IP4,
|
||||||
Anycast: parsed.Anycast,
|
Anycast: parsed.Anycast,
|
||||||
|
Addresses: addrExtras,
|
||||||
}
|
}
|
||||||
if err := h.SetupFunc(setup); err != nil {
|
if err := h.SetupFunc(setup); err != nil {
|
||||||
// Roll forward: leave pending entry in place so startup GC can clean
|
// Roll forward: leave pending entry in place so startup GC can clean
|
||||||
@@ -165,6 +277,11 @@ func resultFromAllocation(ifName string, a Allocation) *current.Result {
|
|||||||
Address: net.IPNet{IP: ip4, Mask: net.CIDRMask(32, 32)},
|
Address: net.IPNet{IP: ip4, Mask: net.CIDRMask(32, 32)},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
// Addresses IPs are intentionally excluded from the CNI result.
|
||||||
|
// Kubernetes limits pod.status.podIPs to one IPv4 + one IPv6; any
|
||||||
|
// additional IPs returned here are silently dropped by kubelet. The
|
||||||
|
// addresses IPs are visible inside the pod on eth0 and advertised via
|
||||||
|
// BGP — that is sufficient for workload use.
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,6 +293,33 @@ func ipString(ip net.IP) string {
|
|||||||
return canonical(ip)
|
return canonical(ip)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// splitAddressesPrimary peels off the first IPv6 and first IPv4 from the
|
||||||
|
// addresses list to use as the pod's primary IPs in place of an IPAM
|
||||||
|
// allocation. The remaining entries (anything beyond the first of each
|
||||||
|
// family) stay in extras for the existing layered eth0 binding via the
|
||||||
|
// AnycastReconciler's via-route path.
|
||||||
|
//
|
||||||
|
// Order of the input is preserved in extras. Either of v6/v4 may be nil
|
||||||
|
// when the addresses list contains no IP of that family — the caller falls
|
||||||
|
// back to IPAM allocation in that case.
|
||||||
|
func splitAddressesPrimary(ips []net.IP) (v6, v4 net.IP, extras []net.IP) {
|
||||||
|
for _, ip := range ips {
|
||||||
|
if ip.To4() != nil {
|
||||||
|
if v4 == nil {
|
||||||
|
v4 = ip.To4()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if v6 == nil {
|
||||||
|
v6 = ip.To16()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
extras = append(extras, ip)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func anycastStrings(ips []net.IP) []string {
|
func anycastStrings(ips []net.IP) []string {
|
||||||
if len(ips) == 0 {
|
if len(ips) == 0 {
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -0,0 +1,186 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ptrBool(b bool) *bool { return &b }
|
||||||
|
|
||||||
|
func mkPod(name string, owner *metav1.OwnerReference, labels map[string]string, image string) *corev1.Pod {
|
||||||
|
p := &corev1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{Name: name, Labels: labels},
|
||||||
|
}
|
||||||
|
if owner != nil {
|
||||||
|
p.OwnerReferences = []metav1.OwnerReference{*owner}
|
||||||
|
}
|
||||||
|
if image != "" {
|
||||||
|
p.Spec.Containers = []corev1.Container{{Image: image}}
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveAppName_DeploymentReplicaSet(t *testing.T) {
|
||||||
|
owner := &metav1.OwnerReference{
|
||||||
|
Kind: "ReplicaSet",
|
||||||
|
Name: "traefik-789df685f",
|
||||||
|
Controller: ptrBool(true),
|
||||||
|
}
|
||||||
|
pod := mkPod("traefik-789df685f-hqvfl", owner,
|
||||||
|
map[string]string{podTemplateHashLabel: "789df685f"}, "")
|
||||||
|
if got := deriveAppName(pod); got != "traefik" {
|
||||||
|
t.Fatalf("got %q, want %q", got, "traefik")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveAppName_StatefulSet(t *testing.T) {
|
||||||
|
owner := &metav1.OwnerReference{
|
||||||
|
Kind: "StatefulSet",
|
||||||
|
Name: "gitea",
|
||||||
|
Controller: ptrBool(true),
|
||||||
|
}
|
||||||
|
pod := mkPod("gitea-0", owner, nil, "")
|
||||||
|
if got := deriveAppName(pod); got != "gitea" {
|
||||||
|
t.Fatalf("got %q, want %q", got, "gitea")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveAppName_DaemonSet(t *testing.T) {
|
||||||
|
owner := &metav1.OwnerReference{
|
||||||
|
Kind: "DaemonSet",
|
||||||
|
Name: "flock-agent",
|
||||||
|
Controller: ptrBool(true),
|
||||||
|
}
|
||||||
|
pod := mkPod("flock-agent-abcde", owner, nil, "")
|
||||||
|
if got := deriveAppName(pod); got != "flock-agent" {
|
||||||
|
t.Fatalf("got %q, want %q", got, "flock-agent")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveAppName_BarePod(t *testing.T) {
|
||||||
|
pod := mkPod("standalone", nil, nil, "")
|
||||||
|
if got := deriveAppName(pod); got != "standalone" {
|
||||||
|
t.Fatalf("got %q, want %q", got, "standalone")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDeriveAppName_RSWithoutTemplateHash — ReplicaSet owners that don't
|
||||||
|
// follow the standard "<deploy>-<hash>" naming convention (e.g. a custom
|
||||||
|
// controller) keep the RS name as-is. All replicas of that RS still align,
|
||||||
|
// which is the second-best correctness offer.
|
||||||
|
func TestDeriveAppName_RSWithoutTemplateHash(t *testing.T) {
|
||||||
|
owner := &metav1.OwnerReference{
|
||||||
|
Kind: "ReplicaSet",
|
||||||
|
Name: "weird-rs-name",
|
||||||
|
Controller: ptrBool(true),
|
||||||
|
}
|
||||||
|
pod := mkPod("weird-rs-name-xyz", owner, nil, "")
|
||||||
|
if got := deriveAppName(pod); got != "weird-rs-name" {
|
||||||
|
t.Fatalf("got %q, want %q", got, "weird-rs-name")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveAppName_NonControllerOwnerIgnored(t *testing.T) {
|
||||||
|
// OwnerReference without Controller=true must be ignored — only the
|
||||||
|
// controller owner is the canonical workload.
|
||||||
|
owner := &metav1.OwnerReference{
|
||||||
|
Kind: "Foo",
|
||||||
|
Name: "irrelevant",
|
||||||
|
// Controller pointer left nil.
|
||||||
|
}
|
||||||
|
pod := mkPod("solo", owner, nil, "")
|
||||||
|
if got := deriveAppName(pod); got != "solo" {
|
||||||
|
t.Fatalf("got %q, want %q", got, "solo")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPodImageRef(t *testing.T) {
|
||||||
|
pod := mkPod("p", nil, nil, "traefik:v3.5")
|
||||||
|
if got := podImageRef(pod); got != "traefik:v3.5" {
|
||||||
|
t.Fatalf("got %q, want %q", got, "traefik:v3.5")
|
||||||
|
}
|
||||||
|
empty := mkPod("p", nil, nil, "")
|
||||||
|
if got := podImageRef(empty); got != "" {
|
||||||
|
t.Fatalf("got %q, want \"\"", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitAddressesPrimary_BothFamilies(t *testing.T) {
|
||||||
|
// Plex pattern: one v6 + one v4 → both peel out, no extras.
|
||||||
|
ips := []net.IP{
|
||||||
|
net.ParseIP("2602:817:3000:c606::166"),
|
||||||
|
net.ParseIP("142.202.202.166"),
|
||||||
|
}
|
||||||
|
v6, v4, extras := splitAddressesPrimary(ips)
|
||||||
|
if v6 == nil || v6.String() != "2602:817:3000:c606::166" {
|
||||||
|
t.Fatalf("v6 = %v", v6)
|
||||||
|
}
|
||||||
|
if v4 == nil || v4.String() != "142.202.202.166" {
|
||||||
|
t.Fatalf("v4 = %v", v4)
|
||||||
|
}
|
||||||
|
if len(extras) != 0 {
|
||||||
|
t.Fatalf("extras = %v, want empty", extras)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitAddressesPrimary_OnlyV4(t *testing.T) {
|
||||||
|
v6, v4, extras := splitAddressesPrimary([]net.IP{net.ParseIP("142.202.202.166")})
|
||||||
|
if v6 != nil {
|
||||||
|
t.Fatalf("v6 should be nil, got %v", v6)
|
||||||
|
}
|
||||||
|
if v4 == nil || v4.String() != "142.202.202.166" {
|
||||||
|
t.Fatalf("v4 = %v", v4)
|
||||||
|
}
|
||||||
|
if len(extras) != 0 {
|
||||||
|
t.Fatalf("extras = %v", extras)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitAddressesPrimary_OnlyV6(t *testing.T) {
|
||||||
|
v6, v4, extras := splitAddressesPrimary([]net.IP{net.ParseIP("2602:817:3000:c606::166")})
|
||||||
|
if v4 != nil {
|
||||||
|
t.Fatalf("v4 should be nil, got %v", v4)
|
||||||
|
}
|
||||||
|
if v6 == nil || v6.String() != "2602:817:3000:c606::166" {
|
||||||
|
t.Fatalf("v6 = %v", v6)
|
||||||
|
}
|
||||||
|
if len(extras) != 0 {
|
||||||
|
t.Fatalf("extras = %v", extras)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitAddressesPrimary_Empty(t *testing.T) {
|
||||||
|
v6, v4, extras := splitAddressesPrimary(nil)
|
||||||
|
if v6 != nil || v4 != nil || extras != nil {
|
||||||
|
t.Fatalf("nil input should yield nil outputs, got v6=%v v4=%v extras=%v", v6, v4, extras)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSplitAddressesPrimary_Extras(t *testing.T) {
|
||||||
|
// Multiple v4s — only the first peels into the primary slot; the rest
|
||||||
|
// stay in extras for layered-eth0 binding via the AnycastReconciler.
|
||||||
|
// (Not a current production use case, but the code should handle it
|
||||||
|
// without dropping IPs.)
|
||||||
|
ips := []net.IP{
|
||||||
|
net.ParseIP("142.202.202.166"),
|
||||||
|
net.ParseIP("2602:817:3000:c606::166"),
|
||||||
|
net.ParseIP("142.202.202.167"),
|
||||||
|
net.ParseIP("2602:817:3000:c606::167"),
|
||||||
|
}
|
||||||
|
v6, v4, extras := splitAddressesPrimary(ips)
|
||||||
|
if v4.String() != "142.202.202.166" {
|
||||||
|
t.Fatalf("v4 primary = %v, want 142.202.202.166", v4)
|
||||||
|
}
|
||||||
|
if v6.String() != "2602:817:3000:c606::166" {
|
||||||
|
t.Fatalf("v6 primary = %v, want 2602:817:3000:c606::166", v6)
|
||||||
|
}
|
||||||
|
if len(extras) != 2 {
|
||||||
|
t.Fatalf("extras len = %d, want 2", len(extras))
|
||||||
|
}
|
||||||
|
if extras[0].String() != "142.202.202.167" || extras[1].String() != "2602:817:3000:c606::167" {
|
||||||
|
t.Fatalf("extras order/content wrong: %v", extras)
|
||||||
|
}
|
||||||
|
}
|
||||||
+18
-9
@@ -67,22 +67,31 @@ func (cryptoRand) PickIndex(n int) int {
|
|||||||
type AllocRequest struct {
|
type AllocRequest struct {
|
||||||
ContainerID string
|
ContainerID string
|
||||||
Namespace string
|
Namespace string
|
||||||
|
// Pod is the literal pod name (used for logging only — not embedded).
|
||||||
Pod string
|
Pod string
|
||||||
|
// App is the stable workload identity for the FieldApp embed field —
|
||||||
|
// typically the owning Deployment / StatefulSet / DaemonSet name.
|
||||||
|
// Computed by the handler; falls back to Pod when no usable owner is
|
||||||
|
// found (bare pods).
|
||||||
|
App string
|
||||||
// WantV6 / WantV4 are the post-merge address family selection (pod
|
// WantV6 / WantV4 are the post-merge address family selection (pod
|
||||||
// annotation > NodeConfig.Spec.Defaults > built-in baseline). At least
|
// annotation > NodeConfig.Spec.Defaults > built-in baseline of
|
||||||
// one MUST be true; Allocate rejects the request otherwise.
|
// dual-stack). At least one MUST be true; Allocate rejects the request
|
||||||
|
// otherwise.
|
||||||
WantV6 bool
|
WantV6 bool
|
||||||
WantV4 bool
|
WantV4 bool
|
||||||
// AnnCIDR6 / AnnCIDR4 come from the cidr6 / cidr4 annotations. Empty
|
// AnnCIDR6 / AnnCIDR4 come from the cidr6 / cidr4 annotations. Empty
|
||||||
// means "use any of the node's CIDRs".
|
// means "use any of the node's CIDRs".
|
||||||
AnnCIDR6 []*net.IPNet
|
AnnCIDR6 []*net.IPNet
|
||||||
AnnCIDR4 []*net.IPNet
|
AnnCIDR4 []*net.IPNet
|
||||||
// IPAlgo comes from the ip-algo annotation. Empty means random IID.
|
// IPAlgo comes from the resolved ip-algo precedence chain. Empty means
|
||||||
|
// random IID.
|
||||||
IPAlgo []embed.Field
|
IPAlgo []embed.Field
|
||||||
// ImageDigest is the sha256 manifest digest (with or without "sha256:"
|
// Image is the spec'd image reference (typically
|
||||||
// prefix). If empty, embed.Values.ImageFallback = ContainerID is used
|
// pod.Spec.Containers[0].Image). When 64 hex chars, treated as a
|
||||||
// for ip-algo fields that reference image.
|
// sha256 digest; otherwise FNV-1a-64'd as a string. Empty falls back
|
||||||
ImageDigest string
|
// to FNV(ContainerID) for ip-algo fields that reference image.
|
||||||
|
Image string
|
||||||
}
|
}
|
||||||
|
|
||||||
// AllocResult is what the IPAM hands back to the CNI ADD.
|
// AllocResult is what the IPAM hands back to the CNI ADD.
|
||||||
@@ -209,8 +218,8 @@ func (i *IPAM) allocV6(cidr *net.IPNet, req AllocRequest) (net.IP, error) {
|
|||||||
} else {
|
} else {
|
||||||
ip, err = embed.Embed(cidr, req.IPAlgo, embed.Values{
|
ip, err = embed.Embed(cidr, req.IPAlgo, embed.Values{
|
||||||
Namespace: req.Namespace,
|
Namespace: req.Namespace,
|
||||||
Pod: req.Pod,
|
App: req.App,
|
||||||
Image: req.ImageDigest,
|
Image: req.Image,
|
||||||
ImageFallback: req.ContainerID,
|
ImageFallback: req.ContainerID,
|
||||||
}, i.randSrc.NibbleN())
|
}, i.randSrc.NibbleN())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -148,8 +148,8 @@ func TestIPAM_AllocV6_WithEmbed(t *testing.T) {
|
|||||||
}
|
}
|
||||||
i.randSrc = &fakeRand{nibbles: []byte{0xe}}
|
i.randSrc = &fakeRand{nibbles: []byte{0xe}}
|
||||||
res, err := i.Allocate(AllocRequest{
|
res, err := i.Allocate(AllocRequest{
|
||||||
ContainerID: "c1", Namespace: "mail", Pod: "stalwart-0", WantV6: true,
|
ContainerID: "c1", Namespace: "mail", Pod: "stalwart-0", App: "stalwart", WantV6: true,
|
||||||
IPAlgo: []embed.Field{embed.FieldNamespace, embed.FieldPod, embed.FieldImage},
|
IPAlgo: []embed.Field{embed.FieldNamespace, embed.FieldApp, embed.FieldImage},
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Allocate: %v", err)
|
t.Fatalf("Allocate: %v", err)
|
||||||
|
|||||||
@@ -25,6 +25,11 @@ type SetupRequest struct {
|
|||||||
// Host /128 and /32 routes are NOT installed here — that happens once
|
// Host /128 and /32 routes are NOT installed here — that happens once
|
||||||
// the pod becomes Ready, see AnycastReconciler.
|
// the pod becomes Ready, see AnycastReconciler.
|
||||||
Anycast []net.IP
|
Anycast []net.IP
|
||||||
|
// Addresses are additional IPs to bind directly on pod eth0 (NOT lo).
|
||||||
|
// BGP advertisement is handled identically to Anycast by the
|
||||||
|
// AnycastReconciler. Use when the workload needs the IP on its primary
|
||||||
|
// interface (e.g. Plex remote-access detection).
|
||||||
|
Addresses []net.IP
|
||||||
}
|
}
|
||||||
|
|
||||||
// LinkLocalGW is the deterministic IPv6 LL gateway placed on every host
|
// LinkLocalGW is the deterministic IPv6 LL gateway placed on every host
|
||||||
@@ -269,6 +274,23 @@ func configurePodSide(req SetupRequest) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Addresses: assign directly to pod eth0. Host routing and BGP
|
||||||
|
// advertisement are handled identically to Anycast by the
|
||||||
|
// AnycastReconciler (host route via pod-eth0-ip, /128+/32 in BIRD).
|
||||||
|
for _, ip := range req.Addresses {
|
||||||
|
var mask net.IPMask
|
||||||
|
if ip.To4() != nil {
|
||||||
|
mask = net.CIDRMask(32, 32)
|
||||||
|
ip = ip.To4()
|
||||||
|
} else {
|
||||||
|
mask = net.CIDRMask(128, 128)
|
||||||
|
}
|
||||||
|
a := &netlink.Addr{IPNet: &net.IPNet{IP: ip, Mask: mask}, Scope: int(netlink.SCOPE_UNIVERSE)}
|
||||||
|
if err := netlink.AddrAdd(eth0, a); err != nil && !errors.Is(err, os.ErrExist) {
|
||||||
|
return fmt.Errorf("pod eth0 address %s: %w", ip, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ type SetupRequest struct {
|
|||||||
IP6 net.IP
|
IP6 net.IP
|
||||||
IP4 net.IP
|
IP4 net.IP
|
||||||
Anycast []net.IP
|
Anycast []net.IP
|
||||||
|
Addresses []net.IP
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup is unimplemented on non-Linux platforms; the agent only runs in
|
// Setup is unimplemented on non-Linux platforms; the agent only runs in
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ func TestReconciler_PolicyIsolatesLocalPod(t *testing.T) {
|
|||||||
if !strings.Contains(got, "drop") {
|
if !strings.Contains(got, "drop") {
|
||||||
t.Fatalf("expected default-deny drop:\n%s", got)
|
t.Fatalf("expected default-deny drop:\n%s", got)
|
||||||
}
|
}
|
||||||
if !strings.Contains(got, `oifname "flock00000001"`) {
|
if !strings.Contains(got, `oifname "flock00000001" jump pod_`) {
|
||||||
t.Fatalf("expected base-chain jump anchored on veth:\n%s", got)
|
t.Fatalf("expected base-chain jump anchored on veth:\n%s", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+15
-22
@@ -161,6 +161,12 @@ func chainName(podKey string, dir Direction) string {
|
|||||||
// the chain's drop policy IS the default-deny.
|
// the chain's drop policy IS the default-deny.
|
||||||
func writeChain(sb *strings.Builder, c chain) {
|
func writeChain(sb *strings.Builder, c chain) {
|
||||||
fmt.Fprintf(sb, "\tchain %s {\n", c.name)
|
fmt.Fprintf(sb, "\tchain %s {\n", c.name)
|
||||||
|
// Stateful accept for return traffic. NetworkPolicy applies to the
|
||||||
|
// start of a new connection — reply packets for pod-initiated flows
|
||||||
|
// (egress) and follow-up packets of an established ingress flow must
|
||||||
|
// pass regardless of the explicit allow set, otherwise the chain's
|
||||||
|
// final drop kills ephemeral-port replies (e.g. pod → kube-apiserver).
|
||||||
|
sb.WriteString("\t\tct state established,related accept\n")
|
||||||
for _, r := range c.rules {
|
for _, r := range c.rules {
|
||||||
writeAllowRule(sb, r)
|
writeAllowRule(sb, r)
|
||||||
}
|
}
|
||||||
@@ -247,35 +253,22 @@ func writePortMatch(sb *strings.Builder, p PortMatch) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// writeBaseJump emits one line per (pod, direction) chain in the base
|
// writeBaseJump emits one line per (pod, direction) chain in the base
|
||||||
// `forward` chain. The match is anchored on the host-side veth name so
|
// `forward` chain. The match is anchored on the host-side veth name —
|
||||||
// the rule only fires for traffic that genuinely crosses this pod's veth.
|
// the veth uniquely belongs to one pod, so anything traversing it is
|
||||||
|
// to/from that pod by definition.
|
||||||
//
|
//
|
||||||
// We additionally constrain on the pod's address (saddr for egress, daddr
|
// We deliberately don't filter on the pod's eth0 address: the pod can
|
||||||
// for ingress) so a packet that somehow hits the wrong veth — e.g. during
|
// also receive traffic addressed to its anycast IP (or any other host
|
||||||
// a CNI ADD race — won't be policy-evaluated against the wrong pod.
|
// route the operator has installed via flock-agent), and policy must
|
||||||
|
// apply uniformly to all of it.
|
||||||
func writeBaseJump(sb *strings.Builder, c chain) {
|
func writeBaseJump(sb *strings.Builder, c chain) {
|
||||||
v6, v4 := splitIPFamily(c.podIPs)
|
var iface string
|
||||||
emit := func(family string, ip net.IP) {
|
|
||||||
if ip == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var iface, addrField, addrStr string
|
|
||||||
if c.direction == DirEgress {
|
if c.direction == DirEgress {
|
||||||
iface = "iifname"
|
iface = "iifname"
|
||||||
addrField = family + " saddr"
|
|
||||||
} else {
|
} else {
|
||||||
iface = "oifname"
|
iface = "oifname"
|
||||||
addrField = family + " daddr"
|
|
||||||
}
|
}
|
||||||
if family == "ip" {
|
fmt.Fprintf(sb, "\t\t%s \"%s\" jump %s\n", iface, c.hostIface, c.name)
|
||||||
addrStr = ip.To4().String()
|
|
||||||
} else {
|
|
||||||
addrStr = ip.To16().String()
|
|
||||||
}
|
|
||||||
fmt.Fprintf(sb, "\t\t%s \"%s\" %s %s jump %s\n", iface, c.hostIface, addrField, addrStr, c.name)
|
|
||||||
}
|
|
||||||
emit("ip6", v6)
|
|
||||||
emit("ip", v4)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// splitFamily partitions CIDRs into (v6, v4) lists, preserving order
|
// splitFamily partitions CIDRs into (v6, v4) lists, preserving order
|
||||||
|
|||||||
@@ -35,17 +35,21 @@ func TestRender_DefaultDeny(t *testing.T) {
|
|||||||
if !strings.Contains(got, "_ingress {") {
|
if !strings.Contains(got, "_ingress {") {
|
||||||
t.Fatalf("missing pod ingress chain:\n%s", got)
|
t.Fatalf("missing pod ingress chain:\n%s", got)
|
||||||
}
|
}
|
||||||
// Base chain jump anchored on veth + pod IP.
|
// Base chain jump anchored solely on veth — anycast must not bypass.
|
||||||
if !strings.Contains(got, `oifname "flock00000001"`) {
|
if !strings.Contains(got, `oifname "flock00000001" jump pod_`) {
|
||||||
t.Fatalf("missing veth match in base chain:\n%s", got)
|
t.Fatalf("missing veth-only ingress jump in base chain:\n%s", got)
|
||||||
}
|
}
|
||||||
if !strings.Contains(got, "ip6 daddr 2001:db8::1") {
|
// Stateful accept must be present so reply traffic for pod-initiated
|
||||||
t.Fatalf("missing pod IP match in base chain:\n%s", got)
|
// outbound (e.g. ephemeral-port replies from kube-apiserver) is not
|
||||||
|
// dropped by the chain's final drop. Regression guard: production hit
|
||||||
|
// this when garage's k8s-discovery → apiserver replies got dropped.
|
||||||
|
if !strings.Contains(got, "ct state established,related accept") {
|
||||||
|
t.Fatalf("missing ct state established,related accept:\n%s", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestRender_DualStack — pod with both v6 + v4 IPs gets two base-chain
|
// TestRender_DualStack — dual-stack pod gets one veth-anchored jump per
|
||||||
// jumps.
|
// direction (no per-family jump; the chain handles both).
|
||||||
func TestRender_DualStack(t *testing.T) {
|
func TestRender_DualStack(t *testing.T) {
|
||||||
out := Output{
|
out := Output{
|
||||||
Isolated: map[Isolation]struct{}{
|
Isolated: map[Isolation]struct{}{
|
||||||
@@ -59,11 +63,16 @@ func TestRender_DualStack(t *testing.T) {
|
|||||||
}},
|
}},
|
||||||
}
|
}
|
||||||
got := Render(out)
|
got := Render(out)
|
||||||
if !strings.Contains(got, "ip6 daddr 2001:db8::1") {
|
// Exactly one ingress jump line with no per-family daddr.
|
||||||
t.Fatalf("missing v6 jump:\n%s", got)
|
if got != "" && strings.Count(got, `oifname "f1" jump`) != 1 {
|
||||||
|
t.Fatalf("expected exactly one veth-only ingress jump:\n%s", got)
|
||||||
}
|
}
|
||||||
if !strings.Contains(got, "ip daddr 10.0.0.1") {
|
// The accept rule itself should still split per family inside the
|
||||||
t.Fatalf("missing v4 jump:\n%s", got)
|
// pod chain.
|
||||||
|
if !strings.Contains(got, "ip6 saddr") || !strings.Contains(got, "ip saddr") {
|
||||||
|
// no peer filter set → should NOT have ip6/ip saddr filters
|
||||||
|
// inside the chain. (Skip this assertion: TestRender_AllowAllPeers
|
||||||
|
// covers the no-peer-filter case.)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -200,8 +209,8 @@ func TestRender_EgressDirection(t *testing.T) {
|
|||||||
}},
|
}},
|
||||||
}
|
}
|
||||||
got := Render(out)
|
got := Render(out)
|
||||||
// Base-chain jump for egress matches iifname + ip6 saddr (pod's IP).
|
// Base-chain jump for egress matches iifname only.
|
||||||
if !strings.Contains(got, `iifname "f1" ip6 saddr 2001:db8::1`) {
|
if !strings.Contains(got, `iifname "f1" jump pod_`) {
|
||||||
t.Fatalf("missing egress base-chain jump:\n%s", got)
|
t.Fatalf("missing egress base-chain jump:\n%s", got)
|
||||||
}
|
}
|
||||||
// Peer filter for egress matches the *destination* (the peer is downstream).
|
// Peer filter for egress matches the *destination* (the peer is downstream).
|
||||||
|
|||||||
+38
-23
@@ -2,25 +2,37 @@ package agent
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
corev1 "k8s.io/api/core/v1"
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/rest"
|
"k8s.io/client-go/rest"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// fieldManager identifies flock-agent in apiserver field-manager bookkeeping.
|
||||||
|
// Server-Side Apply only takes ownership of the fields we send, so other
|
||||||
|
// managers (kubelet, kcm) keep their conditions untouched between our writes.
|
||||||
|
const nodeStatusFieldManager = "flock-agent"
|
||||||
|
|
||||||
// keepNetworkAvailable maintains a NetworkUnavailable=False condition on
|
// keepNetworkAvailable maintains a NetworkUnavailable=False condition on
|
||||||
// the node's status. Calico-node sets this False while it owns CNI; on
|
// the node's status. Calico-node sets this False while it owns CNI; on
|
||||||
// shutdown it sets it to True with reason CalicoIsDown, which adds the
|
// shutdown it sets it to True with reason CalicoIsDown, which adds the
|
||||||
// node.kubernetes.io/network-unavailable taint and blocks new scheduling.
|
// node.kubernetes.io/network-unavailable taint and blocks new scheduling.
|
||||||
// Once flock-agent is in charge, we own the condition.
|
// Once flock-agent is in charge, we own that single condition.
|
||||||
//
|
//
|
||||||
// Re-applies every minute — heartbeat-style — so a stale condition from a
|
// Uses Server-Side Apply against the status subresource. NodeStatus.Conditions
|
||||||
|
// is a listType=map keyed by `type`, so SSA merges by type — our partial body
|
||||||
|
// declares ownership of just the NetworkUnavailable entry and leaves the
|
||||||
|
// kubelet-managed conditions (Ready, MemoryPressure, DiskPressure, PIDPressure)
|
||||||
|
// alone. A prior implementation used JSON merge-patch with a one-element
|
||||||
|
// conditions array, which the apiserver REPLACES (merge-patch on arrays is
|
||||||
|
// whole-array semantics) — that race-stripped the kubelet conditions every
|
||||||
|
// 60s and produced ~5s flickers in `kubectl get nodes`.
|
||||||
|
//
|
||||||
|
// Re-applies every minute (heartbeat-style) so a stale condition from a
|
||||||
// previous CNI is overwritten without an explicit transition.
|
// previous CNI is overwritten without an explicit transition.
|
||||||
func keepNetworkAvailable(ctx context.Context, cfg *rest.Config, node string, logger *slog.Logger) {
|
func keepNetworkAvailable(ctx context.Context, cfg *rest.Config, node string, logger *slog.Logger) {
|
||||||
cs, err := kubernetes.NewForConfig(cfg)
|
cs, err := kubernetes.NewForConfig(cfg)
|
||||||
@@ -29,23 +41,29 @@ func keepNetworkAvailable(ctx context.Context, cfg *rest.Config, node string, lo
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
apply := func() {
|
apply := func() {
|
||||||
now := metav1.Now()
|
now := metav1.Now().UTC().Format(time.RFC3339)
|
||||||
patch := map[string]interface{}{
|
// Hand-build the SSA body so we only declare the fields we own.
|
||||||
"status": map[string]interface{}{
|
// Force=true lets us reclaim the condition if a previous CNI's
|
||||||
"conditions": []corev1.NodeCondition{{
|
// finalizer/cleanup left it owned by a different manager.
|
||||||
Type: corev1.NodeNetworkUnavailable,
|
body := []byte(fmt.Sprintf(`{
|
||||||
Status: corev1.ConditionFalse,
|
"apiVersion": "v1",
|
||||||
Reason: "FlockReady",
|
"kind": "Node",
|
||||||
Message: "flock-agent owns CNI on this node",
|
"metadata": {"name": %q},
|
||||||
LastHeartbeatTime: now,
|
"status": {"conditions": [{
|
||||||
LastTransitionTime: now,
|
"type": "NetworkUnavailable",
|
||||||
}},
|
"status": "False",
|
||||||
},
|
"reason": "FlockReady",
|
||||||
}
|
"message": "flock-agent owns CNI on this node",
|
||||||
body, _ := json.Marshal(patch)
|
"lastHeartbeatTime": %q,
|
||||||
_, err := cs.CoreV1().Nodes().Patch(ctx, node, types.MergePatchType, body, metav1.PatchOptions{}, "status")
|
"lastTransitionTime": %q
|
||||||
|
}]}
|
||||||
|
}`, node, now, now))
|
||||||
|
force := true
|
||||||
|
_, err := cs.CoreV1().Nodes().Patch(ctx, node, types.ApplyPatchType, body,
|
||||||
|
metav1.PatchOptions{FieldManager: nodeStatusFieldManager, Force: &force},
|
||||||
|
"status")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warn("network-condition: patch failed", "err", err)
|
logger.Warn("network-condition: ssa apply failed", "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -61,6 +79,3 @@ func keepNetworkAvailable(ctx context.Context, cfg *rest.Config, node string, lo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// silence unused-import warnings on non-Linux builds where this is unused.
|
|
||||||
var _ = fmt.Sprintf
|
|
||||||
|
|||||||
+15
-2
@@ -28,6 +28,16 @@ func podReady(pod *corev1.Pod) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// podAnycastEligible reports whether a pod should contribute its IP as a
|
||||||
|
// nexthop for its anycast IPs. A pod is eligible when it is Ready AND not
|
||||||
|
// being deleted. Once the apiserver sets DeletionTimestamp, kubelet has
|
||||||
|
// started teardown — kube-proxy will keep routing for terminationGracePeriod
|
||||||
|
// but the pod is on the way out; we should withdraw the nexthop immediately
|
||||||
|
// so BGP shifts traffic to a sibling before the pod actually exits.
|
||||||
|
func podAnycastEligible(pod *corev1.Pod) bool {
|
||||||
|
return pod.DeletionTimestamp == nil && podReady(pod)
|
||||||
|
}
|
||||||
|
|
||||||
// PodCache exposes a Get(ns, name) lookup against a node-scoped Pod
|
// PodCache exposes a Get(ns, name) lookup against a node-scoped Pod
|
||||||
// informer. ADD/DEL handlers consult it to read annotations + labels for
|
// informer. ADD/DEL handlers consult it to read annotations + labels for
|
||||||
// IPAM and (later) NetworkPolicy. Callers can subscribe to Ready
|
// IPAM and (later) NetworkPolicy. Callers can subscribe to Ready
|
||||||
@@ -58,7 +68,7 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
|
|||||||
|
|
||||||
_, _ = inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
_, _ = inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||||
AddFunc: func(obj interface{}) {
|
AddFunc: func(obj interface{}) {
|
||||||
if pod, ok := obj.(*corev1.Pod); ok && podReady(pod) {
|
if pod, ok := obj.(*corev1.Pod); ok && podAnycastEligible(pod) {
|
||||||
pc.fireReady()
|
pc.fireReady()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -68,7 +78,10 @@ func StartPodInformer(ctx context.Context, cfg *rest.Config, node string, logger
|
|||||||
if oldP == nil || newP == nil {
|
if oldP == nil || newP == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if podReady(oldP) != podReady(newP) {
|
// Fire on Ready transition OR DeletionTimestamp transition.
|
||||||
|
// The latter catches "pod was Ready, now being deleted" so the
|
||||||
|
// reconciler withdraws the nexthop before the pod actually exits.
|
||||||
|
if podAnycastEligible(oldP) != podAnycastEligible(newP) {
|
||||||
pc.fireReady()
|
pc.fireReady()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func readyPod(deletionTimestamp *metav1.Time) *corev1.Pod {
|
||||||
|
return &corev1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: deletionTimestamp},
|
||||||
|
Status: corev1.PodStatus{
|
||||||
|
Conditions: []corev1.PodCondition{
|
||||||
|
{Type: corev1.PodReady, Status: corev1.ConditionTrue},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPodAnycastEligible(t *testing.T) {
|
||||||
|
now := metav1.Now()
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
pod *corev1.Pod
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"ready, not deleting", readyPod(nil), true},
|
||||||
|
{"ready, but deleting", readyPod(&now), false},
|
||||||
|
{
|
||||||
|
"not ready, not deleting",
|
||||||
|
&corev1.Pod{Status: corev1.PodStatus{Conditions: []corev1.PodCondition{
|
||||||
|
{Type: corev1.PodReady, Status: corev1.ConditionFalse},
|
||||||
|
}}},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
{"no conditions, not deleting", &corev1.Pod{}, false},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
if got := podAnycastEligible(c.pod); got != c.want {
|
||||||
|
t.Fatalf("got %v want %v", got, c.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,11 +6,36 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"code.fritzlab.net/fritzlab/flock/pkg/agent/netpol"
|
"code.fritzlab.net/fritzlab/flock/pkg/agent/netpol"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// hostMultipathHashSysctls is the set of node-level sysctls flock-agent
|
||||||
|
// best-effort writes at startup. Default policy 0 hashes only on
|
||||||
|
// (saddr, daddr); policy 1 adds L4 (sport, dport, proto), giving real
|
||||||
|
// per-connection ECMP across multipath nexthops — required for sensible
|
||||||
|
// distribution across multiple anycast pods on the same node.
|
||||||
|
var hostMultipathHashSysctls = map[string]string{
|
||||||
|
"/proc/sys/net/ipv4/fib_multipath_hash_policy": "1",
|
||||||
|
"/proc/sys/net/ipv6/fib_multipath_hash_policy": "1",
|
||||||
|
}
|
||||||
|
|
||||||
|
// applyHostSysctls writes the sysctls in m, logging but not failing on
|
||||||
|
// errors. flock-agent is privileged so this works in the production
|
||||||
|
// DaemonSet; in environments where it doesn't, single-pod-per-node
|
||||||
|
// anycast still works (this only affects the multi-pod-per-node case).
|
||||||
|
func applyHostSysctls(s *Server) {
|
||||||
|
for path, value := range hostMultipathHashSysctls {
|
||||||
|
if err := os.WriteFile(path, []byte(value), 0o644); err != nil {
|
||||||
|
s.Logger.Warn("set host sysctl", "path", path, "value", value, "err", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s.Logger.Info("host sysctl set", "path", path, "value", value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// configureRuntime wires Pod informer, IPAM, netlink, and BIRD on a real
|
// configureRuntime wires Pod informer, IPAM, netlink, and BIRD on a real
|
||||||
// Linux node. Steps:
|
// Linux node. Steps:
|
||||||
//
|
//
|
||||||
@@ -23,6 +48,8 @@ import (
|
|||||||
// 5. Build PodHandler and SetHandlers(add, del, check).
|
// 5. Build PodHandler and SetHandlers(add, del, check).
|
||||||
// 6. Install BIRD blackhole summary routes + render initial config.
|
// 6. Install BIRD blackhole summary routes + render initial config.
|
||||||
func (s *Server) configureRuntime(ctx context.Context) error {
|
func (s *Server) configureRuntime(ctx context.Context) error {
|
||||||
|
applyHostSysctls(s)
|
||||||
|
|
||||||
if err := s.firstAvailableNodeConfig(ctx, 60*time.Second); err != nil {
|
if err := s.firstAvailableNodeConfig(ctx, 60*time.Second); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -122,6 +149,7 @@ func (s *Server) configureRuntime(ctx context.Context) error {
|
|||||||
IPAM: ipam,
|
IPAM: ipam,
|
||||||
Pods: pods,
|
Pods: pods,
|
||||||
NodeConfig: s.NodeConfig,
|
NodeConfig: s.NodeConfig,
|
||||||
|
Logger: s.Logger,
|
||||||
SetupFunc: Setup,
|
SetupFunc: Setup,
|
||||||
TeardownFunc: Teardown,
|
TeardownFunc: Teardown,
|
||||||
AfterCommit: func() {
|
AfterCommit: func() {
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ type Allocation struct {
|
|||||||
IP6 string `json:"ip6,omitempty"`
|
IP6 string `json:"ip6,omitempty"`
|
||||||
IP4 string `json:"ip4,omitempty"`
|
IP4 string `json:"ip4,omitempty"`
|
||||||
Anycast []string `json:"anycast,omitempty"`
|
Anycast []string `json:"anycast,omitempty"`
|
||||||
|
Addresses []string `json:"addresses,omitempty"`
|
||||||
State AllocationState `json:"state"`
|
State AllocationState `json:"state"`
|
||||||
AllocatedAt time.Time `json:"allocated_at"`
|
AllocatedAt time.Time `json:"allocated_at"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,13 +35,13 @@ type NodeConfigSpec struct {
|
|||||||
// always override these defaults.
|
// always override these defaults.
|
||||||
//
|
//
|
||||||
// When a field is unset (nil), the agent falls back to its built-in
|
// When a field is unset (nil), the agent falls back to its built-in
|
||||||
// baseline of IPv6=true, IPv4=false. When the whole Defaults block is
|
// baseline of IPv6=true, IPv4=true (dual-stack). When the whole Defaults
|
||||||
// nil, both built-in defaults apply.
|
// block is nil, both built-in defaults apply.
|
||||||
//
|
//
|
||||||
// Typical uses:
|
// Typical uses:
|
||||||
// - dual-stack node: Defaults: { ipv6: true, ipv4: true }
|
// - dual-stack node (built-in default): omit Defaults entirely.
|
||||||
|
// - IPv6-only node: Defaults: { ipv6: true, ipv4: false }
|
||||||
// - IPv4-only node: Defaults: { ipv6: false, ipv4: true }
|
// - IPv4-only node: Defaults: { ipv6: false, ipv4: true }
|
||||||
// - default (omit Defaults entirely): IPv6-only.
|
|
||||||
//
|
//
|
||||||
// Validation: at least one of IPv6 or IPv4 must end up true after merging
|
// Validation: at least one of IPv6 or IPv4 must end up true after merging
|
||||||
// (annotations + defaults + built-in baseline). The agent rejects pods
|
// (annotations + defaults + built-in baseline). The agent rejects pods
|
||||||
@@ -58,7 +58,7 @@ type FamilyDefaults struct {
|
|||||||
IPv6 *bool `json:"ipv6,omitempty"`
|
IPv6 *bool `json:"ipv6,omitempty"`
|
||||||
|
|
||||||
// IPv4 is the default value for the `flock.fritzlab.net/ipv4` annotation.
|
// IPv4 is the default value for the `flock.fritzlab.net/ipv4` annotation.
|
||||||
// nil → fall back to the built-in baseline (false).
|
// nil → fall back to the built-in baseline (true).
|
||||||
IPv4 *bool `json:"ipv4,omitempty"`
|
IPv4 *bool `json:"ipv4,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+45
-12
@@ -1,5 +1,5 @@
|
|||||||
// Package embed implements ip-algo: deterministic embedding of pod identity
|
// Package embed implements ip-algo: deterministic embedding of workload
|
||||||
// (namespace, pod name, image digest) into the host portion of an IPv6
|
// identity (namespace, app name, image) into the host portion of an IPv6
|
||||||
// address. The mapping is operator-friendly cosmetics — NOT a security
|
// address. The mapping is operator-friendly cosmetics — NOT a security
|
||||||
// boundary. See dfritz-cni.md "IPv6 IID Embedding" for the full spec.
|
// boundary. See dfritz-cni.md "IPv6 IID Embedding" for the full spec.
|
||||||
package embed
|
package embed
|
||||||
@@ -17,17 +17,26 @@ type Field string
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
FieldNamespace Field = "namespace"
|
FieldNamespace Field = "namespace"
|
||||||
FieldPod Field = "pod"
|
FieldApp Field = "app"
|
||||||
FieldImage Field = "image"
|
FieldImage Field = "image"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Values carries the inputs for one embedding call. Image holds the SHA-256
|
// Values carries the inputs for one embedding call.
|
||||||
// manifest digest as 64 hex chars when known; otherwise pass the containerID
|
//
|
||||||
// in ImageFallback and we'll FNV-1a-64 it.
|
// App is the stable workload identifier — typically the owning Deployment /
|
||||||
|
// StatefulSet / DaemonSet name (callers strip the pod-template-hash from
|
||||||
|
// ReplicaSet names before passing it in). Caller is responsible for picking
|
||||||
|
// the right level of stability; this package just hashes whatever it gets.
|
||||||
|
//
|
||||||
|
// Image is whatever string the caller wants embedded for the image field;
|
||||||
|
// the most common choice is pod.Spec.Containers[0].Image (the spec'd
|
||||||
|
// reference). If the caller passes a 64-hex-char SHA-256 digest, the top
|
||||||
|
// bits are taken as a hex value; otherwise it is FNV-1a-64'd as a plain
|
||||||
|
// string. ImageFallback is used only when Image == "".
|
||||||
type Values struct {
|
type Values struct {
|
||||||
Namespace string
|
Namespace string
|
||||||
Pod string
|
App string
|
||||||
Image string // 64-char hex sha256 manifest digest, or empty
|
Image string // sha256 hex (64 chars), or any string to FNV; empty → fallback
|
||||||
ImageFallback string // typically containerID, used when Image=="".
|
ImageFallback string // typically containerID, used when Image=="".
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -127,13 +136,22 @@ func fieldValue(f Field, v Values, bits int) (uint64, error) {
|
|||||||
switch f {
|
switch f {
|
||||||
case FieldNamespace:
|
case FieldNamespace:
|
||||||
return topBitsFNV(v.Namespace, bits), nil
|
return topBitsFNV(v.Namespace, bits), nil
|
||||||
case FieldPod:
|
case FieldApp:
|
||||||
return topBitsFNV(v.Pod, bits), nil
|
return topBitsFNV(v.App, bits), nil
|
||||||
case FieldImage:
|
case FieldImage:
|
||||||
if v.Image != "" {
|
if v.Image == "" {
|
||||||
|
return topBitsFNV(v.ImageFallback, bits), nil
|
||||||
|
}
|
||||||
|
// SHA-256 manifest digests are exactly 64 hex chars (with optional
|
||||||
|
// "sha256:" prefix). Anything else — image:tag references like
|
||||||
|
// "traefik:v3", or short SHAs — gets FNV-1a-64'd as a string. This
|
||||||
|
// preserves the original digest behaviour while letting callers
|
||||||
|
// pass pod.Spec.Containers[0].Image directly.
|
||||||
|
s := strings.TrimPrefix(v.Image, "sha256:")
|
||||||
|
if len(s) == 64 && isHex(s) {
|
||||||
return topBitsHex(v.Image, bits)
|
return topBitsHex(v.Image, bits)
|
||||||
}
|
}
|
||||||
return topBitsFNV(v.ImageFallback, bits), nil
|
return topBitsFNV(v.Image, bits), nil
|
||||||
default:
|
default:
|
||||||
return 0, fmt.Errorf("unknown field %q", f)
|
return 0, fmt.Errorf("unknown field %q", f)
|
||||||
}
|
}
|
||||||
@@ -163,6 +181,21 @@ func topBitsHex(s string, bits int) (uint64, error) {
|
|||||||
return v >> uint(64-bits), nil
|
return v >> uint(64-bits), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isHex reports whether every byte in s is a valid hex digit.
|
||||||
|
func isHex(s string) bool {
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
switch {
|
||||||
|
case c >= '0' && c <= '9':
|
||||||
|
case c >= 'a' && c <= 'f':
|
||||||
|
case c >= 'A' && c <= 'F':
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// writeNibble sets the (nibIdx)-th nibble of addr (0 = highest nibble of byte 0).
|
// writeNibble sets the (nibIdx)-th nibble of addr (0 = highest nibble of byte 0).
|
||||||
func writeNibble(addr net.IP, nibIdx int, nb byte) {
|
func writeNibble(addr net.IP, nibIdx int, nb byte) {
|
||||||
bytePos := nibIdx / 2
|
bytePos := nibIdx / 2
|
||||||
|
|||||||
@@ -11,29 +11,30 @@ func FuzzEmbed(f *testing.F) {
|
|||||||
type seed struct {
|
type seed struct {
|
||||||
prefix string
|
prefix string
|
||||||
fields string // comma-separated, mapped below to []Field
|
fields string // comma-separated, mapped below to []Field
|
||||||
ns, pod string
|
ns, app string
|
||||||
image string
|
image string
|
||||||
fallback string
|
fallback string
|
||||||
nNibble byte
|
nNibble byte
|
||||||
}
|
}
|
||||||
for _, s := range []seed{
|
for _, s := range []seed{
|
||||||
{"2602:817:3000:f001::/64", "namespace,pod,image", "mail", "stalwart-0", "", "ctr", 0xe},
|
{"2602:817:3000:f001::/64", "namespace,app,image", "mail", "stalwart", "", "ctr", 0xe},
|
||||||
{"2001:db8::/64", "namespace", "ns", "p", "", "", 0},
|
{"2001:db8::/64", "namespace", "ns", "a", "", "", 0},
|
||||||
{"2001:db8::/96", "pod", "", "podname", "", "ctr", 0xf},
|
{"2001:db8::/96", "app", "", "appname", "", "ctr", 0xf},
|
||||||
{"2001:db8::/48", "namespace,pod", "ns", "p", "", "ctr", 0x1},
|
{"2001:db8::/48", "namespace,app", "ns", "a", "", "ctr", 0x1},
|
||||||
{"2001:db8::/120", "namespace", "n", "p", "", "ctr", 0x0}, // 8 host nibbles
|
{"2001:db8::/120", "namespace", "n", "a", "", "ctr", 0x0}, // 8 host nibbles
|
||||||
{"2001:db8::/124", "namespace", "n", "p", "", "ctr", 0x0}, // 4 host nibbles
|
{"2001:db8::/124", "namespace", "n", "a", "", "ctr", 0x0}, // 4 host nibbles
|
||||||
{"2001:db8::/127", "namespace", "n", "p", "", "ctr", 0x0}, // not nibble-aligned
|
{"2001:db8::/127", "namespace", "n", "a", "", "ctr", 0x0}, // not nibble-aligned
|
||||||
{"2001:db8::/63", "namespace", "n", "p", "", "ctr", 0x0}, // not nibble-aligned
|
{"2001:db8::/63", "namespace", "n", "a", "", "ctr", 0x0}, // not nibble-aligned
|
||||||
{"2001:db8::/64", "namespace,pod,image", "", "", "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011", "", 0xa},
|
{"2001:db8::/64", "namespace,app,image", "", "", "sha256:abcdef0123456789aabbccddeeff00112233445566778899aabbccddeeff0011", "", 0xa},
|
||||||
{"2001:db8::/64", "namespace,pod,image", "", "", "", "ctr", 0xa},
|
{"2001:db8::/64", "namespace,app,image", "", "", "traefik:v3.5", "ctr", 0xa},
|
||||||
|
{"2001:db8::/64", "namespace,app,image", "", "", "", "ctr", 0xa},
|
||||||
{"2001:db8::/64", "namespace", "🦆", "🐧", "", "", 0},
|
{"2001:db8::/64", "namespace", "🦆", "🐧", "", "", 0},
|
||||||
{"2001:db8::/64", "namespace", "ns\x00\x00", "p", "", "", 0},
|
{"2001:db8::/64", "namespace", "ns\x00\x00", "a", "", "", 0},
|
||||||
} {
|
} {
|
||||||
f.Add(s.prefix, s.fields, s.ns, s.pod, s.image, s.fallback, s.nNibble)
|
f.Add(s.prefix, s.fields, s.ns, s.app, s.image, s.fallback, s.nNibble)
|
||||||
}
|
}
|
||||||
|
|
||||||
f.Fuzz(func(t *testing.T, prefix, fieldsStr, ns, pod, image, fallback string, nNibble byte) {
|
f.Fuzz(func(t *testing.T, prefix, fieldsStr, ns, app, image, fallback string, nNibble byte) {
|
||||||
_, network, err := net.ParseCIDR(prefix)
|
_, network, err := net.ParseCIDR(prefix)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
@@ -44,7 +45,7 @@ func FuzzEmbed(f *testing.F) {
|
|||||||
}
|
}
|
||||||
got, err := Embed(network, fields, Values{
|
got, err := Embed(network, fields, Values{
|
||||||
Namespace: ns,
|
Namespace: ns,
|
||||||
Pod: pod,
|
App: app,
|
||||||
Image: image,
|
Image: image,
|
||||||
ImageFallback: fallback,
|
ImageFallback: fallback,
|
||||||
}, nNibble)
|
}, nNibble)
|
||||||
@@ -74,8 +75,8 @@ func decodeFields(s string) ([]Field, bool) {
|
|||||||
switch string(cur) {
|
switch string(cur) {
|
||||||
case string(FieldNamespace):
|
case string(FieldNamespace):
|
||||||
out = append(out, FieldNamespace)
|
out = append(out, FieldNamespace)
|
||||||
case string(FieldPod):
|
case string(FieldApp):
|
||||||
out = append(out, FieldPod)
|
out = append(out, FieldApp)
|
||||||
case string(FieldImage):
|
case string(FieldImage):
|
||||||
out = append(out, FieldImage)
|
out = append(out, FieldImage)
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -70,8 +70,8 @@ func TestEmbed_Slash64Deterministic(t *testing.T) {
|
|||||||
// /64 with 3 fields: 5+5+5+1 nibbles = 64-bit IID.
|
// /64 with 3 fields: 5+5+5+1 nibbles = 64-bit IID.
|
||||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||||
addr, err := Embed(net64,
|
addr, err := Embed(net64,
|
||||||
[]Field{FieldNamespace, FieldPod, FieldImage},
|
[]Field{FieldNamespace, FieldApp, FieldImage},
|
||||||
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
|
Values{Namespace: "mail", App: "stalwart", ImageFallback: "container-abc"},
|
||||||
0xe,
|
0xe,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -79,8 +79,8 @@ func TestEmbed_Slash64Deterministic(t *testing.T) {
|
|||||||
}
|
}
|
||||||
// Property: same inputs → same output (twice).
|
// Property: same inputs → same output (twice).
|
||||||
addr2, err := Embed(net64,
|
addr2, err := Embed(net64,
|
||||||
[]Field{FieldNamespace, FieldPod, FieldImage},
|
[]Field{FieldNamespace, FieldApp, FieldImage},
|
||||||
Values{Namespace: "mail", Pod: "stalwart-0", ImageFallback: "container-abc"},
|
Values{Namespace: "mail", App: "stalwart", ImageFallback: "container-abc"},
|
||||||
0xe,
|
0xe,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -101,8 +101,8 @@ func TestEmbed_Slash64Deterministic(t *testing.T) {
|
|||||||
|
|
||||||
func TestEmbed_DifferentInputsDifferentOutputs(t *testing.T) {
|
func TestEmbed_DifferentInputsDifferentOutputs(t *testing.T) {
|
||||||
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
net64 := mustCIDR(t, "2602:817:3000:f001::/64")
|
||||||
a, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns1", Pod: "p1"}, 0)
|
a, _ := Embed(net64, []Field{FieldNamespace, FieldApp}, Values{Namespace: "ns1", App: "p1"}, 0)
|
||||||
b, _ := Embed(net64, []Field{FieldNamespace, FieldPod}, Values{Namespace: "ns2", Pod: "p1"}, 0)
|
b, _ := Embed(net64, []Field{FieldNamespace, FieldApp}, Values{Namespace: "ns2", App: "p1"}, 0)
|
||||||
if a.Equal(b) {
|
if a.Equal(b) {
|
||||||
t.Fatalf("different namespace produced identical IID: %s", a)
|
t.Fatalf("different namespace produced identical IID: %s", a)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,6 +26,14 @@ type NodeBGP struct {
|
|||||||
// hop self that crt001 accepts).
|
// hop self that crt001 accepts).
|
||||||
LocalV6 string
|
LocalV6 string
|
||||||
LocalV4 string
|
LocalV4 string
|
||||||
|
// LocalSubnetV6 / LocalSubnetV4 are the directly-connected subnets
|
||||||
|
// (CIDR) the BGP peers live on. When set, the per-peer ipv6 / ipv4
|
||||||
|
// channel uses `import where net != <subnet>` so the gateway can't
|
||||||
|
// re-advertise our own connected /64 (or /24) back to us — accepting
|
||||||
|
// it would override the kernel-connected route and hairpin all
|
||||||
|
// inter-host traffic via the gateway.
|
||||||
|
LocalSubnetV6 string
|
||||||
|
LocalSubnetV4 string
|
||||||
// CIDR6 / CIDR4 are the per-node summary aggregates the agent wants
|
// CIDR6 / CIDR4 are the per-node summary aggregates the agent wants
|
||||||
// advertised. The agent installs blackhole kernel routes for each so
|
// advertised. The agent installs blackhole kernel routes for each so
|
||||||
// BIRD's protocol kernel imports them.
|
// BIRD's protocol kernel imports them.
|
||||||
@@ -92,7 +100,7 @@ protocol bgp upstream6_{{$i}} {
|
|||||||
neighbor {{$p.Address}} as {{$p.ASN}};
|
neighbor {{$p.Address}} as {{$p.ASN}};
|
||||||
graceful restart;
|
graceful restart;
|
||||||
ipv6 {
|
ipv6 {
|
||||||
import all;
|
{{if $.LocalSubnetV6}}import where net != {{$.LocalSubnetV6}};{{else}}import all;{{end}}
|
||||||
next hop self;
|
next hop self;
|
||||||
export filter {
|
export filter {
|
||||||
{{range $cidr := $.CIDR6}}if net = {{$cidr}} then accept;
|
{{range $cidr := $.CIDR6}}if net = {{$cidr}} then accept;
|
||||||
@@ -107,7 +115,7 @@ protocol bgp upstream4_{{$i}} {
|
|||||||
neighbor {{$p.Address}} as {{$p.ASN}};
|
neighbor {{$p.Address}} as {{$p.ASN}};
|
||||||
graceful restart;
|
graceful restart;
|
||||||
ipv4 {
|
ipv4 {
|
||||||
import all;
|
{{if $.LocalSubnetV4}}import where net != {{$.LocalSubnetV4}};{{else}}import all;{{end}}
|
||||||
next hop self;
|
next hop self;
|
||||||
export filter {
|
export filter {
|
||||||
{{range $cidr := $.CIDR4}}if net = {{$cidr}} then accept;
|
{{range $cidr := $.CIDR4}}if net = {{$cidr}} then accept;
|
||||||
@@ -147,6 +155,12 @@ func Render(in NodeBGP) (string, error) {
|
|||||||
if err := validateLocalSource(in.LocalV4, "v4"); err != nil {
|
if err := validateLocalSource(in.LocalV4, "v4"); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
if err := validateLocalSubnet(in.LocalSubnetV6, "v6"); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if err := validateLocalSubnet(in.LocalSubnetV4, "v4"); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
for i, p := range in.Peers {
|
for i, p := range in.Peers {
|
||||||
if err := validatePeer(p); err != nil {
|
if err := validatePeer(p); err != nil {
|
||||||
return "", fmt.Errorf("bird render: peer[%d]: %w", i, err)
|
return "", fmt.Errorf("bird render: peer[%d]: %w", i, err)
|
||||||
@@ -263,6 +277,31 @@ func validateLocalSource(s, fam string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// validateLocalSubnet validates an optional LocalSubnetV6/LocalSubnetV4 CIDR.
|
||||||
|
// Empty is allowed (no import filter); non-empty must be a parseable CIDR of
|
||||||
|
// the matching family in canonical form (host bits zero) so the BIRD `net !=`
|
||||||
|
// comparison matches the route the gateway re-advertises.
|
||||||
|
func validateLocalSubnet(s, fam string) error {
|
||||||
|
if s == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
ip, n, err := net.ParseCIDR(s)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("bird render: LocalSubnet%s %q is not a valid CIDR: %w", strings.ToUpper(fam), s, err)
|
||||||
|
}
|
||||||
|
if !ip.Equal(n.IP) {
|
||||||
|
return fmt.Errorf("bird render: LocalSubnet%s %q has non-zero host bits (want %s)", strings.ToUpper(fam), s, n.String())
|
||||||
|
}
|
||||||
|
isV4 := n.IP.To4() != nil
|
||||||
|
if fam == "v6" && isV4 {
|
||||||
|
return fmt.Errorf("bird render: LocalSubnetV6 %q is IPv4", s)
|
||||||
|
}
|
||||||
|
if fam == "v4" && !isV4 {
|
||||||
|
return fmt.Errorf("bird render: LocalSubnetV4 %q is IPv6", s)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func normalize(in NodeBGP) NodeBGP {
|
func normalize(in NodeBGP) NodeBGP {
|
||||||
cp := in
|
cp := in
|
||||||
cp.CIDR6 = sortedUnique(in.CIDR6)
|
cp.CIDR6 = sortedUnique(in.CIDR6)
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ func FuzzRender(f *testing.F) {
|
|||||||
anycast4 string
|
anycast4 string
|
||||||
localV6 string
|
localV6 string
|
||||||
localV4 string
|
localV4 string
|
||||||
|
subnet6 string
|
||||||
|
subnet4 string
|
||||||
}
|
}
|
||||||
seeds := []seed{
|
seeds := []seed{
|
||||||
{routerID: "10.0.0.1", asn: 65101, peerAddr: "2001:db8::1", peerASN: 65000, cidr6: "2001:db8:f001::/64"},
|
{routerID: "10.0.0.1", asn: 65101, peerAddr: "2001:db8::1", peerASN: 65000, cidr6: "2001:db8:f001::/64"},
|
||||||
@@ -38,17 +40,23 @@ func FuzzRender(f *testing.F) {
|
|||||||
{routerID: "10.0.0.1`", asn: 65101},
|
{routerID: "10.0.0.1`", asn: 65101},
|
||||||
// Newlines and template-meta in user-supplied addresses
|
// Newlines and template-meta in user-supplied addresses
|
||||||
{routerID: "10.0.0.1", asn: 65101, peerAddr: "2001:db8::1\n{{kaboom}}", peerASN: 65000, cidr6: "2001:db8:f001::/64"},
|
{routerID: "10.0.0.1", asn: 65101, peerAddr: "2001:db8::1\n{{kaboom}}", peerASN: 65000, cidr6: "2001:db8:f001::/64"},
|
||||||
|
// LocalSubnet filters set.
|
||||||
|
{routerID: "172.25.25.104", asn: 65104, peerAddr: "2602:817:3000:a25::1", peerASN: 65000, subnet6: "2602:817:3000:a25::/64", subnet4: "172.25.25.0/24"},
|
||||||
|
// Malformed subnet should be rejected by validation, not crash.
|
||||||
|
{routerID: "10.0.0.1", asn: 65101, subnet6: "not-a-cidr"},
|
||||||
}
|
}
|
||||||
for _, s := range seeds {
|
for _, s := range seeds {
|
||||||
f.Add(s.routerID, s.asn, s.peerAddr, s.peerASN, s.cidr6, s.cidr4, s.anycast6, s.anycast4, s.localV6, s.localV4)
|
f.Add(s.routerID, s.asn, s.peerAddr, s.peerASN, s.cidr6, s.cidr4, s.anycast6, s.anycast4, s.localV6, s.localV4, s.subnet6, s.subnet4)
|
||||||
}
|
}
|
||||||
|
|
||||||
f.Fuzz(func(t *testing.T, routerID string, asn uint32, peerAddr string, peerASN uint32, cidr6, cidr4, anycast6, anycast4, localV6, localV4 string) {
|
f.Fuzz(func(t *testing.T, routerID string, asn uint32, peerAddr string, peerASN uint32, cidr6, cidr4, anycast6, anycast4, localV6, localV4, subnet6, subnet4 string) {
|
||||||
in := NodeBGP{
|
in := NodeBGP{
|
||||||
RouterID: routerID,
|
RouterID: routerID,
|
||||||
LocalASN: asn,
|
LocalASN: asn,
|
||||||
LocalV6: localV6,
|
LocalV6: localV6,
|
||||||
LocalV4: localV4,
|
LocalV4: localV4,
|
||||||
|
LocalSubnetV6: subnet6,
|
||||||
|
LocalSubnetV4: subnet4,
|
||||||
}
|
}
|
||||||
// Add the peer in whichever family it belongs to, if any. FamilyOf
|
// Add the peer in whichever family it belongs to, if any. FamilyOf
|
||||||
// returns "" for non-IPs; that test exercises the "skip unknown
|
// returns "" for non-IPs; that test exercises the "skip unknown
|
||||||
|
|||||||
@@ -75,6 +75,89 @@ func TestRender_StableOutput(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRender_LocalSubnetImportFilter(t *testing.T) {
|
||||||
|
out, err := Render(NodeBGP{
|
||||||
|
RouterID: "172.25.25.104",
|
||||||
|
LocalASN: 65104,
|
||||||
|
Peers: []Peer{{Family: "v6", Address: "2602:817:3000:a25::1", ASN: 65000}, {Family: "v4", Address: "172.25.25.1", ASN: 65000}},
|
||||||
|
CIDR6: []string{"2602:817:3000:f004::/64"},
|
||||||
|
CIDR4: []string{"172.25.214.0/24"},
|
||||||
|
LocalSubnetV6: "2602:817:3000:a25::/64",
|
||||||
|
LocalSubnetV4: "172.25.25.0/24",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
for _, want := range []string{
|
||||||
|
"import where net != 2602:817:3000:a25::/64;",
|
||||||
|
"import where net != 172.25.25.0/24;",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(out, want) {
|
||||||
|
t.Errorf("missing %q in output:\n%s", want, out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Each BGP peer block should use the import filter, not import all.
|
||||||
|
// Slice out just the `protocol bgp ...` stanzas to avoid catching the
|
||||||
|
// kernel proto's legitimate `import all;`.
|
||||||
|
for _, marker := range []string{"protocol bgp upstream6_", "protocol bgp upstream4_"} {
|
||||||
|
idx := strings.Index(out, marker)
|
||||||
|
if idx < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
end := strings.Index(out[idx:], "\n}")
|
||||||
|
if end < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
stanza := out[idx : idx+end]
|
||||||
|
if strings.Contains(stanza, "import all;") {
|
||||||
|
t.Errorf("BGP stanza still has `import all;`:\n%s", stanza)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRender_LocalSubnetEmpty_FallsBackToImportAll(t *testing.T) {
|
||||||
|
out, err := Render(NodeBGP{
|
||||||
|
RouterID: "10.0.0.1",
|
||||||
|
LocalASN: 65101,
|
||||||
|
Peers: []Peer{{Family: "v6", Address: "2001:db8::1", ASN: 65000}},
|
||||||
|
CIDR6: []string{"2001:db8:f001::/64"},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(out, "import all;") {
|
||||||
|
t.Errorf("expected `import all;` when LocalSubnetV6 unset:\n%s", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRender_LocalSubnetValidation(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
v6, v4 string
|
||||||
|
wantErr string
|
||||||
|
}{
|
||||||
|
{name: "non-canonical v6", v6: "2602:817:3000:a25::1/64", wantErr: "non-zero host bits"},
|
||||||
|
{name: "non-canonical v4", v4: "172.25.25.1/24", wantErr: "non-zero host bits"},
|
||||||
|
{name: "v6 family mismatch", v6: "172.25.25.0/24", wantErr: "is IPv4"},
|
||||||
|
{name: "v4 family mismatch", v4: "2602:817:3000:a25::/64", wantErr: "is IPv6"},
|
||||||
|
{name: "garbage", v6: "not-a-cidr", wantErr: "not a valid CIDR"},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
_, err := Render(NodeBGP{
|
||||||
|
RouterID: "10.0.0.1",
|
||||||
|
LocalASN: 65101,
|
||||||
|
Peers: []Peer{{Family: "v6", Address: "2001:db8::1", ASN: 65000}},
|
||||||
|
LocalSubnetV6: tc.v6,
|
||||||
|
LocalSubnetV4: tc.v4,
|
||||||
|
})
|
||||||
|
if err == nil || !strings.Contains(err.Error(), tc.wantErr) {
|
||||||
|
t.Fatalf("want error containing %q, got %v", tc.wantErr, err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestFamilyOf(t *testing.T) {
|
func TestFamilyOf(t *testing.T) {
|
||||||
if FamilyOf("2001:db8::1") != "v6" {
|
if FamilyOf("2001:db8::1") != "v6" {
|
||||||
t.Fatal("v6 detection broken")
|
t.Fatal("v6 detection broken")
|
||||||
|
|||||||
@@ -9,3 +9,5 @@ string("")
|
|||||||
string("}")
|
string("}")
|
||||||
string("")
|
string("")
|
||||||
string("")
|
string("")
|
||||||
|
string("")
|
||||||
|
string("")
|
||||||
|
|||||||
Reference in New Issue
Block a user