Files
Donavan Fritz d431fbddb4 site-publish: honor site.yaml excludes during S3 sync
site.yaml can now declare excludes: [paths/patterns] that are passed to
`aws s3 sync` and `aws s3 cp` as --exclude flags, so the listed objects
are neither uploaded from the build dir nor deleted from the bucket.
Escape hatch for assets managed out-of-band (e.g. large PDFs uploaded
via aws-cli) that would otherwise be wiped by --delete.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 10:12:10 -05:00

179 lines
6.4 KiB
Python

"""Deploy phase — S3 sync, manifest rendering, alias reconcile."""
import json
import os
import shlex
import shutil
import tempfile
from pathlib import Path
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
from utils import (
DEFAULT_S3_ENDPOINT,
GITEA_HOST,
NAMESPACE,
clone_apps,
commit_and_push,
die,
env,
k8s_name,
parse_site_yaml,
render_templates,
run,
)
GARAGE_ADMIN_ENDPOINT = os.environ.get(
"GARAGE_ADMIN_ENDPOINT", "http://garage.storage.svc:3903"
)
CACHE_CONTROL = "public, max-age=0, must-revalidate"
def s3_sync(site_name, site_dir, excludes=None):
endpoint = os.environ.get("GARAGE_S3_ENDPOINT", DEFAULT_S3_ENDPOINT)
html_dir = site_dir / "build" / "html"
if not html_dir.exists():
die(f"build/html not found — did the build step run? ({html_dir})")
env("AWS_ACCESS_KEY_ID")
env("AWS_SECRET_ACCESS_KEY")
os.environ.setdefault("AWS_DEFAULT_REGION", "sjc001")
# `excludes` are patterns (site.yaml `excludes:` list) that should never
# be uploaded *and* should never be deleted from the bucket — escape hatch
# for assets managed out-of-band (e.g. large PDFs uploaded via aws-cli).
exclude_flags = " ".join(f"--exclude {shlex.quote(p)}" for p in (excludes or []))
if excludes:
print(f"Excluding patterns: {excludes}")
print(f"Syncing {html_dir} → s3://{site_name} via {endpoint}")
# `sync --delete` handles new/changed/orphaned files. `cp --recursive`
# then re-uploads everything to refresh metadata (cache-control,
# content-type) on objects sync skipped because nothing changed.
# Cost: a no-op deploy still re-uploads every byte. Sites here are
# small enough that that's free; correctness wins over throughput.
# AWS CLI guesses Content-Type from file extension on local→S3 uploads,
# so a fresh upload always carries the right MIME type.
run(
f"aws --endpoint-url {endpoint} s3 sync {html_dir}/ s3://{site_name}/ "
f"--delete --only-show-errors "
f"--cache-control '{CACHE_CONTROL}' "
f"{exclude_flags}".rstrip()
)
print("Re-stamping metadata on all objects...")
run(
f"aws --endpoint-url {endpoint} s3 cp {html_dir}/ s3://{site_name}/ "
f"--recursive --only-show-errors "
f"--cache-control '{CACHE_CONTROL}' "
f"{exclude_flags}".rstrip()
)
def garage_admin(method, path, token, body=None):
url = f"{GARAGE_ADMIN_ENDPOINT}{path}"
data = json.dumps(body).encode() if body is not None else None
headers = {"Authorization": f"Bearer {token}"}
if data is not None:
headers["Content-Type"] = "application/json"
req = Request(url, data=data, method=method, headers=headers)
with urlopen(req) as resp:
raw = resp.read()
return json.loads(raw) if raw else {}
def ensure_bucket_aliases(site_name, aliases, admin_token):
"""Add cfg['aliases'] as Garage globalAliases on the site bucket.
Idempotent: skips aliases already present. Never removes aliases not in
the desired set (safety — orphan removal is manual).
"""
if not aliases:
return
if not admin_token:
print(" (no GARAGE_ADMIN_TOKEN — skipping bucket alias reconcile)")
return
try:
info = garage_admin("GET", f"/v2/GetBucketInfo?globalAlias={site_name}",
admin_token)
except (HTTPError, URLError) as e:
print(f" WARNING: bucket lookup failed: {e}")
return
bucket_id = info.get("id")
existing = set(info.get("globalAliases") or [])
print(f" Bucket {site_name} ({bucket_id[:12]}…) currently aliases: {sorted(existing)}")
for alias in aliases:
if alias in existing:
continue
print(f" Adding globalAlias: {alias}")
try:
garage_admin("POST", "/v2/AddBucketAlias", admin_token,
{"bucketId": bucket_id, "globalAlias": alias})
except HTTPError as e:
body = e.read().decode(errors="replace") if hasattr(e, "read") else ""
print(f" ERROR adding alias {alias}: {e} {body}")
raise
def render_site_manifests(site_name, action_dir, app_dir, manifests_dir, cfg):
"""Always re-render manifests from current site.yaml. Templates own
domain + aliases, so changes propagate without manual edits."""
manifests_dir.mkdir(parents=True, exist_ok=True)
template_vars = {
"site": site_name,
"site_k8s": k8s_name(site_name),
"domain": cfg["domain"],
"aliases": cfg["aliases"],
"namespace": NAMESPACE,
}
render_templates(action_dir, template_vars, app_dir, manifests_dir)
def deploy_static(site_name, site_dir, action_dir, token, cfg):
s3_sync(site_name, site_dir, excludes=cfg.get("excludes"))
ensure_bucket_aliases(site_name, cfg["aliases"], os.environ.get("GARAGE_ADMIN_TOKEN"))
apps_dir = clone_apps(token)
app_dir = apps_dir / "sjc001" / "websites" / site_name
manifests_dir = app_dir / "manifests"
render_site_manifests(site_name, action_dir, app_dir, manifests_dir, cfg)
commit_and_push(apps_dir, f"Deploy {site_name}")
def decommission(site_name, token):
"""Remove manifests from apps repo."""
user = env("CI_BOT_USER", "ci-bot")
with tempfile.TemporaryDirectory() as tmp:
apps_dir = Path(tmp)
run(f"git clone --depth 1 https://{user}:{token}@{GITEA_HOST}/fritzlab/apps.git {apps_dir}")
site_path = apps_dir / "sjc001" / "websites" / site_name
if not site_path.exists():
print(f"No manifests for {site_name} — nothing to remove")
return
shutil.rmtree(site_path)
run(f"git -C {apps_dir} config user.name {user}")
run(f"git -C {apps_dir} config user.email {user}@fritzlab.net")
commit_and_push(apps_dir, f"Decommission {site_name}")
print(f"Bucket {site_name} and its objects are NOT purged automatically.")
print(f" garage bucket delete {site_name} --yes")
def cmd_deploy():
site_repo = env("SITE_REPO")
site_dir = Path(env("SITE_DIR"))
action_dir = Path(env("ACTION_DIR"))
token = env("CI_BOT_TOKEN")
site_name = site_repo.split("/", 1)[1]
cfg = parse_site_yaml(site_dir)
if not cfg["enabled"]:
print("Site disabled — running decommission...")
decommission(site_name, token)
return
deploy_static(site_name, site_dir, action_dir, token, cfg)